1use crate::model::encoding;
4use crate::model::filesystem::{FileMetadata, FileSystem, WriteOp};
5use crate::model::piece_tree::{
6 BufferData, BufferLocation, Cursor, PieceInfo, PieceRangeIter, PieceTree, PieceView, Position,
7 StringBuffer, TreeStats,
8};
9use crate::model::piece_tree_diff::PieceTreeDiff;
10use crate::primitives::grapheme;
11use anyhow::{Context, Result};
12use regex::bytes::Regex;
13use std::io::{self, Write};
14use std::ops::Range;
15use std::path::{Path, PathBuf};
16use std::sync::Arc;
17
18pub use encoding::Encoding;
20
21#[derive(Debug, Clone, PartialEq)]
26pub struct SudoSaveRequired {
27 pub temp_path: PathBuf,
29 pub dest_path: PathBuf,
31 pub uid: u32,
33 pub gid: u32,
35 pub mode: u32,
37}
38
39impl std::fmt::Display for SudoSaveRequired {
40 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41 write!(
42 f,
43 "Permission denied saving to {}. Use sudo to complete the operation.",
44 self.dest_path.display()
45 )
46 }
47}
48
49impl std::error::Error for SudoSaveRequired {}
50
51#[derive(Debug, Clone, PartialEq)]
58pub struct LargeFileEncodingConfirmation {
59 pub path: PathBuf,
61 pub file_size: usize,
63 pub encoding: Encoding,
65}
66
67impl std::fmt::Display for LargeFileEncodingConfirmation {
68 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69 let size_mb = self.file_size as f64 / (1024.0 * 1024.0);
70 write!(
71 f,
72 "{} ({:.0} MB) requires full load. (l)oad, (e)ncoding, (C)ancel? ",
73 self.encoding.display_name(),
74 size_mb
75 )
76 }
77}
78
79impl std::error::Error for LargeFileEncodingConfirmation {}
80
81#[derive(Debug, Clone)]
83pub struct LineScanChunk {
84 pub leaf_index: usize,
86 pub byte_len: usize,
88 pub already_known: bool,
90}
91
92pub const DEFAULT_LARGE_FILE_THRESHOLD: usize = 100 * 1024 * 1024;
95
96pub const LOAD_CHUNK_SIZE: usize = 1024 * 1024;
98
99pub const CHUNK_ALIGNMENT: usize = 64 * 1024;
101
102#[derive(Debug, Clone)]
104pub struct BufferConfig {
105 pub estimated_line_length: usize,
108}
109
110impl Default for BufferConfig {
111 fn default() -> Self {
112 Self {
113 estimated_line_length: 80,
114 }
115 }
116}
117
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
120pub enum LineEnding {
121 #[default]
123 LF,
124 CRLF,
126 CR,
128}
129
130impl LineEnding {
131 pub fn as_str(&self) -> &'static str {
133 match self {
134 Self::LF => "\n",
135 Self::CRLF => "\r\n",
136 Self::CR => "\r",
137 }
138 }
139
140 pub fn display_name(&self) -> &'static str {
142 match self {
143 Self::LF => "LF",
144 Self::CRLF => "CRLF",
145 Self::CR => "CR",
146 }
147 }
148}
149
150struct WriteRecipe {
152 src_path: Option<PathBuf>,
154 insert_data: Vec<Vec<u8>>,
156 actions: Vec<RecipeAction>,
158}
159
160#[derive(Debug, Clone, Copy)]
162enum RecipeAction {
163 Copy { offset: u64, len: u64 },
165 Insert { index: usize },
167}
168
169impl WriteRecipe {
170 fn to_write_ops(&self) -> Vec<WriteOp<'_>> {
172 self.actions
173 .iter()
174 .map(|action| match action {
175 RecipeAction::Copy { offset, len } => WriteOp::Copy {
176 offset: *offset,
177 len: *len,
178 },
179 RecipeAction::Insert { index } => WriteOp::Insert {
180 data: &self.insert_data[*index],
181 },
182 })
183 .collect()
184 }
185
186 fn has_copy_ops(&self) -> bool {
188 self.actions
189 .iter()
190 .any(|a| matches!(a, RecipeAction::Copy { .. }))
191 }
192
193 fn flatten_inserts(&self) -> Vec<u8> {
196 let mut result = Vec::new();
197 for action in &self.actions {
198 if let RecipeAction::Insert { index } = action {
199 result.extend_from_slice(&self.insert_data[*index]);
200 }
201 }
202 result
203 }
204}
205
206#[derive(Debug, Clone, Copy, PartialEq, Eq)]
209pub enum LineNumber {
210 Absolute(usize),
212 Relative {
214 line: usize,
215 from_cached_line: usize,
216 },
217}
218
219impl LineNumber {
220 pub fn value(&self) -> usize {
222 match self {
223 Self::Absolute(line) | Self::Relative { line, .. } => *line,
224 }
225 }
226
227 pub fn is_absolute(&self) -> bool {
229 matches!(self, LineNumber::Absolute(_))
230 }
231
232 pub fn is_relative(&self) -> bool {
234 matches!(self, LineNumber::Relative { .. })
235 }
236
237 pub fn format(&self) -> String {
239 match self {
240 Self::Absolute(line) => format!("{}", line + 1),
241 Self::Relative { line, .. } => format!("~{}", line + 1),
242 }
243 }
244}
245
246pub struct TextBuffer {
249 fs: Arc<dyn FileSystem + Send + Sync>,
252
253 piece_tree: PieceTree,
255
256 saved_root: Arc<crate::model::piece_tree::PieceTreeNode>,
258
259 buffers: Vec<StringBuffer>,
263
264 next_buffer_id: usize,
266
267 file_path: Option<PathBuf>,
269
270 modified: bool,
272
273 recovery_pending: bool,
277
278 large_file: bool,
280
281 line_feeds_scanned: bool,
286
287 is_binary: bool,
290
291 line_ending: LineEnding,
293
294 original_line_ending: LineEnding,
298
299 encoding: Encoding,
301
302 original_encoding: Encoding,
305
306 saved_file_size: Option<usize>,
310
311 version: u64,
313
314 config: BufferConfig,
316}
317
318#[derive(Debug, Clone)]
324pub struct BufferSnapshot {
325 pub piece_tree: PieceTree,
326 pub buffers: Vec<StringBuffer>,
327 pub next_buffer_id: usize,
328}
329
330impl TextBuffer {
331 pub fn new(_large_file_threshold: usize, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
334 let piece_tree = PieceTree::empty();
335 let line_ending = LineEnding::default();
336 let encoding = Encoding::default();
337 TextBuffer {
338 fs,
339 saved_root: piece_tree.root(),
340 piece_tree,
341 buffers: vec![StringBuffer::new(0, Vec::new())],
342 next_buffer_id: 1,
343 file_path: None,
344 modified: false,
345 recovery_pending: false,
346 large_file: false,
347 line_feeds_scanned: false,
348 is_binary: false,
349 line_ending,
350 original_line_ending: line_ending,
351 encoding,
352 original_encoding: encoding,
353 saved_file_size: None,
354 version: 0,
355 config: BufferConfig::default(),
356 }
357 }
358
359 pub fn new_with_path(
362 large_file_threshold: usize,
363 fs: Arc<dyn FileSystem + Send + Sync>,
364 path: PathBuf,
365 ) -> Self {
366 let mut buffer = Self::new(large_file_threshold, fs);
367 buffer.file_path = Some(path);
368 buffer
369 }
370
371 pub fn version(&self) -> u64 {
373 self.version
374 }
375
376 pub fn filesystem(&self) -> &Arc<dyn FileSystem + Send + Sync> {
378 &self.fs
379 }
380
381 pub fn set_filesystem(&mut self, fs: Arc<dyn FileSystem + Send + Sync>) {
383 self.fs = fs;
384 }
385
386 #[inline]
387 fn bump_version(&mut self) {
388 self.version = self.version.wrapping_add(1);
389 }
390
391 #[inline]
392 fn mark_content_modified(&mut self) {
393 self.modified = true;
394 self.recovery_pending = true;
395 self.bump_version();
396 }
397
398 fn from_bytes_raw(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
401 let bytes = content.len();
402
403 let line_ending = Self::detect_line_ending(&content);
405
406 let buffer = StringBuffer::new(0, content);
408 let line_feed_cnt = buffer.line_feed_count();
409
410 let piece_tree = if bytes > 0 {
411 PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
412 } else {
413 PieceTree::empty()
414 };
415
416 let saved_root = piece_tree.root();
417
418 TextBuffer {
419 fs,
420 line_ending,
421 original_line_ending: line_ending,
422 encoding: Encoding::Utf8, original_encoding: Encoding::Utf8,
424 piece_tree,
425 saved_root,
426 buffers: vec![buffer],
427 next_buffer_id: 1,
428 file_path: None,
429 modified: false,
430 recovery_pending: false,
431 large_file: false,
432 line_feeds_scanned: false,
433 is_binary: true,
434 saved_file_size: Some(bytes),
435 version: 0,
436 config: BufferConfig::default(),
437 }
438 }
439
440 pub fn from_bytes(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
442 let (encoding, utf8_content) = Self::detect_and_convert_encoding(&content);
444
445 let bytes = utf8_content.len();
446
447 let line_ending = Self::detect_line_ending(&utf8_content);
449
450 let buffer = StringBuffer::new(0, utf8_content);
452 let line_feed_cnt = buffer.line_feed_count();
453
454 let piece_tree = if bytes > 0 {
455 PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
456 } else {
457 PieceTree::empty()
458 };
459
460 let saved_root = piece_tree.root();
461
462 TextBuffer {
463 fs,
464 line_ending,
465 original_line_ending: line_ending,
466 encoding,
467 original_encoding: encoding,
468 piece_tree,
469 saved_root,
470 buffers: vec![buffer],
471 next_buffer_id: 1,
472 file_path: None,
473 modified: false,
474 recovery_pending: false,
475 large_file: false,
476 line_feeds_scanned: false,
477 is_binary: false,
478 saved_file_size: Some(bytes), version: 0,
480 config: BufferConfig::default(),
481 }
482 }
483
484 pub fn from_bytes_with_encoding(
486 content: Vec<u8>,
487 encoding: Encoding,
488 fs: Arc<dyn FileSystem + Send + Sync>,
489 ) -> Self {
490 let utf8_content = encoding::convert_to_utf8(&content, encoding);
492
493 let bytes = utf8_content.len();
494
495 let line_ending = Self::detect_line_ending(&utf8_content);
497
498 let buffer = StringBuffer::new(0, utf8_content);
500 let line_feed_cnt = buffer.line_feed_count();
501
502 let piece_tree = if bytes > 0 {
503 PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
504 } else {
505 PieceTree::empty()
506 };
507
508 let saved_root = piece_tree.root();
509
510 TextBuffer {
511 fs,
512 line_ending,
513 original_line_ending: line_ending,
514 encoding,
515 original_encoding: encoding,
516 piece_tree,
517 saved_root,
518 buffers: vec![buffer],
519 next_buffer_id: 1,
520 file_path: None,
521 modified: false,
522 recovery_pending: false,
523 large_file: false,
524 line_feeds_scanned: false,
525 is_binary: false,
526 saved_file_size: Some(bytes),
527 version: 0,
528 config: BufferConfig::default(),
529 }
530 }
531
532 pub fn from_str(
534 s: &str,
535 _large_file_threshold: usize,
536 fs: Arc<dyn FileSystem + Send + Sync>,
537 ) -> Self {
538 Self::from_bytes(s.as_bytes().to_vec(), fs)
539 }
540
541 pub fn empty(fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
543 let piece_tree = PieceTree::empty();
544 let saved_root = piece_tree.root();
545 let line_ending = LineEnding::default();
546 let encoding = Encoding::default();
547 TextBuffer {
548 fs,
549 piece_tree,
550 saved_root,
551 buffers: vec![StringBuffer::new(0, Vec::new())],
552 next_buffer_id: 1,
553 file_path: None,
554 modified: false,
555 recovery_pending: false,
556 large_file: false,
557 line_feeds_scanned: false,
558 is_binary: false,
559 line_ending,
560 original_line_ending: line_ending,
561 encoding,
562 original_encoding: encoding,
563 saved_file_size: None,
564 version: 0,
565 config: BufferConfig::default(),
566 }
567 }
568
569 pub fn load_from_file<P: AsRef<Path>>(
571 path: P,
572 large_file_threshold: usize,
573 fs: Arc<dyn FileSystem + Send + Sync>,
574 ) -> anyhow::Result<Self> {
575 let path = path.as_ref();
576
577 let metadata = fs.metadata(path)?;
579 let file_size = metadata.size as usize;
580
581 let threshold = if large_file_threshold > 0 {
583 large_file_threshold
584 } else {
585 DEFAULT_LARGE_FILE_THRESHOLD
586 };
587
588 if file_size >= threshold {
590 Self::load_large_file(path, file_size, fs)
591 } else {
592 Self::load_small_file(path, fs)
593 }
594 }
595
596 pub fn load_from_file_with_encoding<P: AsRef<Path>>(
598 path: P,
599 encoding: Encoding,
600 fs: Arc<dyn FileSystem + Send + Sync>,
601 config: BufferConfig,
602 ) -> anyhow::Result<Self> {
603 let path = path.as_ref();
604 let contents = fs.read_file(path)?;
605
606 let mut buffer = Self::from_bytes_with_encoding(contents, encoding, fs);
607 buffer.file_path = Some(path.to_path_buf());
608 buffer.modified = false;
609 buffer.config = config;
610 Ok(buffer)
611 }
612
613 fn load_small_file(path: &Path, fs: Arc<dyn FileSystem + Send + Sync>) -> anyhow::Result<Self> {
615 let contents = fs.read_file(path)?;
616
617 let (encoding, is_binary) = Self::detect_encoding_or_binary(&contents);
619
620 let mut buffer = if is_binary {
622 Self::from_bytes_raw(contents, fs)
623 } else {
624 Self::from_bytes(contents, fs)
626 };
627 buffer.file_path = Some(path.to_path_buf());
628 buffer.modified = false;
629 buffer.large_file = false;
630 buffer.is_binary = is_binary;
631 if is_binary {
633 buffer.encoding = encoding;
634 buffer.original_encoding = encoding;
635 }
636 Ok(buffer)
638 }
639
640 pub fn check_large_file_encoding(
649 path: impl AsRef<Path>,
650 fs: Arc<dyn FileSystem + Send + Sync>,
651 ) -> anyhow::Result<Option<LargeFileEncodingConfirmation>> {
652 let path = path.as_ref();
653 let metadata = fs.metadata(path)?;
654 let file_size = metadata.size as usize;
655
656 if file_size < DEFAULT_LARGE_FILE_THRESHOLD {
658 return Ok(None);
659 }
660
661 let sample_size = file_size.min(8 * 1024);
663 let sample = fs.read_range(path, 0, sample_size)?;
664 let (encoding, is_binary) = Self::detect_encoding_or_binary(&sample);
665
666 if is_binary {
668 return Ok(None);
669 }
670
671 if encoding.requires_full_file_load() {
673 return Ok(Some(LargeFileEncodingConfirmation {
674 path: path.to_path_buf(),
675 file_size,
676 encoding,
677 }));
678 }
679
680 Ok(None)
681 }
682
683 fn load_large_file(
688 path: &Path,
689 file_size: usize,
690 fs: Arc<dyn FileSystem + Send + Sync>,
691 ) -> anyhow::Result<Self> {
692 Self::load_large_file_internal(path, file_size, fs, false)
693 }
694
695 pub fn load_large_file_confirmed(
700 path: impl AsRef<Path>,
701 fs: Arc<dyn FileSystem + Send + Sync>,
702 ) -> anyhow::Result<Self> {
703 let path = path.as_ref();
704 let metadata = fs.metadata(path)?;
705 let file_size = metadata.size as usize;
706 Self::load_large_file_internal(path, file_size, fs, true)
707 }
708
709 fn load_large_file_internal(
711 path: &Path,
712 file_size: usize,
713 fs: Arc<dyn FileSystem + Send + Sync>,
714 force_full_load: bool,
715 ) -> anyhow::Result<Self> {
716 use crate::model::piece_tree::{BufferData, BufferLocation};
717
718 let sample_size = file_size.min(8 * 1024);
721 let sample = fs.read_range(path, 0, sample_size)?;
722
723 let (encoding, is_binary) = Self::detect_encoding_or_binary(&sample);
725
726 if is_binary {
728 tracing::info!("Large binary file detected, loading without encoding conversion");
729 let contents = fs.read_file(path)?;
730 let mut buffer = Self::from_bytes_raw(contents, fs);
731 buffer.file_path = Some(path.to_path_buf());
732 buffer.modified = false;
733 buffer.large_file = true;
734 buffer.encoding = encoding;
735 buffer.original_encoding = encoding;
736 return Ok(buffer);
737 }
738
739 let requires_full_load = encoding.requires_full_file_load();
741
742 if requires_full_load && !force_full_load {
744 anyhow::bail!(LargeFileEncodingConfirmation {
745 path: path.to_path_buf(),
746 file_size,
747 encoding,
748 });
749 }
750
751 if !matches!(encoding, Encoding::Utf8 | Encoding::Ascii) {
754 tracing::info!(
755 "Large file with non-UTF-8 encoding ({:?}), loading fully for conversion",
756 encoding
757 );
758 let contents = fs.read_file(path)?;
759 let mut buffer = Self::from_bytes(contents, fs);
760 buffer.file_path = Some(path.to_path_buf());
761 buffer.modified = false;
762 buffer.large_file = true; buffer.is_binary = is_binary;
764 return Ok(buffer);
765 }
766
767 let line_ending = Self::detect_line_ending(&sample);
769
770 let buffer = StringBuffer {
772 id: 0,
773 data: BufferData::Unloaded {
774 file_path: path.to_path_buf(),
775 file_offset: 0,
776 bytes: file_size,
777 },
778 stored_file_offset: None,
779 };
780
781 let piece_tree = if file_size > 0 {
784 PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
785 } else {
786 PieceTree::empty()
787 };
788 let saved_root = piece_tree.root();
789
790 tracing::debug!(
791 "Buffer::load_from_file: loaded {} bytes, saved_file_size={}",
792 file_size,
793 file_size
794 );
795
796 Ok(TextBuffer {
797 fs,
798 piece_tree,
799 saved_root,
800 buffers: vec![buffer],
801 next_buffer_id: 1,
802 file_path: Some(path.to_path_buf()),
803 modified: false,
804 recovery_pending: false,
805 large_file: true,
806 line_feeds_scanned: false,
807 is_binary,
808 line_ending,
809 original_line_ending: line_ending,
810 encoding,
811 original_encoding: encoding,
812 saved_file_size: Some(file_size),
813 version: 0,
814 config: BufferConfig::default(),
815 })
816 }
817
818 pub fn save(&mut self) -> anyhow::Result<()> {
820 if let Some(path) = &self.file_path {
821 self.save_to_file(path.clone())
822 } else {
823 anyhow::bail!(io::Error::new(
824 io::ErrorKind::NotFound,
825 "No file path associated with buffer",
826 ))
827 }
828 }
829
830 fn should_use_inplace_write(&self, dest_path: &Path) -> bool {
837 !self.fs.is_owner(dest_path)
838 }
839
840 fn build_write_recipe(&self) -> io::Result<WriteRecipe> {
849 let total = self.total_bytes();
850
851 let needs_line_ending_conversion = self.line_ending != self.original_line_ending;
858 let needs_encoding_conversion = !self.is_binary
864 && (self.encoding != self.original_encoding
865 || !matches!(self.encoding, Encoding::Utf8 | Encoding::Ascii));
866 let needs_conversion = needs_line_ending_conversion || needs_encoding_conversion;
867
868 let src_path_for_copy: Option<&Path> = if needs_conversion {
869 None
870 } else {
871 self.file_path.as_deref().filter(|p| self.fs.exists(p))
872 };
873 let target_ending = self.line_ending;
874 let target_encoding = self.encoding;
875
876 let mut insert_data: Vec<Vec<u8>> = Vec::new();
877 let mut actions: Vec<RecipeAction> = Vec::new();
878
879 if let Some(bom) = target_encoding.bom_bytes() {
881 insert_data.push(bom.to_vec());
882 actions.push(RecipeAction::Insert { index: 0 });
883 }
884
885 for piece_view in self.piece_tree.iter_pieces_in_range(0, total) {
886 let buffer_id = piece_view.location.buffer_id();
887 let buffer = self.buffers.get(buffer_id).ok_or_else(|| {
888 io::Error::new(
889 io::ErrorKind::InvalidData,
890 format!("Buffer {} not found", buffer_id),
891 )
892 })?;
893
894 match &buffer.data {
895 BufferData::Unloaded {
897 file_path,
898 file_offset,
899 ..
900 } => {
901 let can_copy = matches!(piece_view.location, BufferLocation::Stored(_))
907 && src_path_for_copy.is_some_and(|src| file_path == src);
908
909 if can_copy {
910 let src_offset = (*file_offset + piece_view.buffer_offset) as u64;
911 actions.push(RecipeAction::Copy {
912 offset: src_offset,
913 len: piece_view.bytes as u64,
914 });
915 continue;
916 }
917
918 let data = self.fs.read_range(
921 file_path,
922 (*file_offset + piece_view.buffer_offset) as u64,
923 piece_view.bytes,
924 )?;
925
926 let data = if needs_line_ending_conversion {
927 Self::convert_line_endings_to(&data, target_ending)
928 } else {
929 data
930 };
931
932 let data = if needs_encoding_conversion {
934 Self::convert_to_encoding(&data, target_encoding)
935 } else {
936 data
937 };
938
939 let index = insert_data.len();
940 insert_data.push(data);
941 actions.push(RecipeAction::Insert { index });
942 }
943
944 BufferData::Loaded { data, .. } => {
946 let start = piece_view.buffer_offset;
947 let end = start + piece_view.bytes;
948 let chunk = &data[start..end];
949
950 let chunk = if needs_line_ending_conversion {
951 Self::convert_line_endings_to(chunk, target_ending)
952 } else {
953 chunk.to_vec()
954 };
955
956 let chunk = if needs_encoding_conversion {
958 Self::convert_to_encoding(&chunk, target_encoding)
959 } else {
960 chunk
961 };
962
963 let index = insert_data.len();
964 insert_data.push(chunk);
965 actions.push(RecipeAction::Insert { index });
966 }
967 }
968 }
969
970 Ok(WriteRecipe {
971 src_path: src_path_for_copy.map(|p| p.to_path_buf()),
972 insert_data,
973 actions,
974 })
975 }
976
977 fn create_temp_file(
983 &self,
984 dest_path: &Path,
985 ) -> io::Result<(PathBuf, Box<dyn crate::model::filesystem::FileWriter>)> {
986 let same_dir_temp = self.fs.temp_path_for(dest_path);
988 match self.fs.create_file(&same_dir_temp) {
989 Ok(file) => Ok((same_dir_temp, file)),
990 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
991 let temp_path = self.fs.unique_temp_path(dest_path);
993 let file = self.fs.create_file(&temp_path)?;
994 Ok((temp_path, file))
995 }
996 Err(e) => Err(e),
997 }
998 }
999
1000 fn create_recovery_temp_file(
1003 &self,
1004 dest_path: &Path,
1005 ) -> io::Result<(PathBuf, Box<dyn crate::model::filesystem::FileWriter>)> {
1006 let recovery_dir = crate::input::input_history::get_data_dir()
1008 .map(|d| d.join("recovery"))
1009 .unwrap_or_else(|_| std::env::temp_dir());
1010
1011 self.fs.create_dir_all(&recovery_dir)?;
1013
1014 let file_name = dest_path
1016 .file_name()
1017 .unwrap_or_else(|| std::ffi::OsStr::new("fresh-save"));
1018 let timestamp = std::time::SystemTime::now()
1019 .duration_since(std::time::UNIX_EPOCH)
1020 .map(|d| d.as_nanos())
1021 .unwrap_or(0);
1022 let pid = std::process::id();
1023
1024 let temp_name = format!(
1025 ".inplace-{}-{}-{}.tmp",
1026 file_name.to_string_lossy(),
1027 pid,
1028 timestamp
1029 );
1030 let temp_path = recovery_dir.join(temp_name);
1031
1032 let file = self.fs.create_file(&temp_path)?;
1033 Ok((temp_path, file))
1034 }
1035
1036 fn inplace_recovery_meta_path(&self, dest_path: &Path) -> PathBuf {
1039 let recovery_dir = crate::input::input_history::get_data_dir()
1040 .map(|d| d.join("recovery"))
1041 .unwrap_or_else(|_| std::env::temp_dir());
1042
1043 let hash = crate::services::recovery::path_hash(dest_path);
1044 recovery_dir.join(format!("{}.inplace.json", hash))
1045 }
1046
1047 fn write_inplace_recovery_meta(
1050 &self,
1051 meta_path: &Path,
1052 dest_path: &Path,
1053 temp_path: &Path,
1054 original_metadata: &Option<FileMetadata>,
1055 ) -> io::Result<()> {
1056 #[cfg(unix)]
1057 let (uid, gid, mode) = original_metadata
1058 .as_ref()
1059 .map(|m| {
1060 (
1061 m.uid.unwrap_or(0),
1062 m.gid.unwrap_or(0),
1063 m.permissions.as_ref().map(|p| p.mode()).unwrap_or(0o644),
1064 )
1065 })
1066 .unwrap_or((0, 0, 0o644));
1067 #[cfg(not(unix))]
1068 let (uid, gid, mode) = (0u32, 0u32, 0o644u32);
1069
1070 let recovery = crate::services::recovery::InplaceWriteRecovery::new(
1071 dest_path.to_path_buf(),
1072 temp_path.to_path_buf(),
1073 uid,
1074 gid,
1075 mode,
1076 );
1077
1078 let json = serde_json::to_string_pretty(&recovery)
1079 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1080
1081 self.fs.write_file(meta_path, json.as_bytes())
1082 }
1083
1084 pub fn save_to_file<P: AsRef<Path>>(&mut self, path: P) -> anyhow::Result<()> {
1099 let dest_path = path.as_ref();
1100 let total = self.total_bytes();
1101
1102 if total == 0 {
1104 self.fs.write_file(dest_path, &[])?;
1105 self.finalize_save(dest_path)?;
1106 return Ok(());
1107 }
1108
1109 let recipe = self.build_write_recipe()?;
1111 let ops = recipe.to_write_ops();
1112
1113 let is_local = self.fs.remote_connection_info().is_none();
1116 let use_inplace = is_local && self.should_use_inplace_write(dest_path);
1117
1118 if use_inplace {
1119 self.save_with_inplace_write(dest_path, &recipe)?;
1121 } else if !recipe.has_copy_ops() && !is_local {
1122 let data = recipe.flatten_inserts();
1124 self.fs.write_file(dest_path, &data)?;
1125 } else if is_local {
1126 let write_result = if !recipe.has_copy_ops() {
1128 let data = recipe.flatten_inserts();
1129 self.fs.write_file(dest_path, &data)
1130 } else {
1131 let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
1132 self.fs.write_patched(src_for_patch, dest_path, &ops)
1133 };
1134
1135 if let Err(e) = write_result {
1136 if e.kind() == io::ErrorKind::PermissionDenied {
1137 let original_metadata = self.fs.metadata_if_exists(dest_path);
1139 let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
1140 self.write_recipe_to_file(&mut temp_file, &recipe)?;
1141 temp_file.sync_all()?;
1142 drop(temp_file);
1143 return Err(self.make_sudo_error(temp_path, dest_path, original_metadata));
1144 }
1145 return Err(e.into());
1146 }
1147 } else {
1148 let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
1150 self.fs.write_patched(src_for_patch, dest_path, &ops)?;
1151 }
1152
1153 self.finalize_save(dest_path)?;
1154 Ok(())
1155 }
1156
1157 fn save_with_inplace_write(
1170 &self,
1171 dest_path: &Path,
1172 recipe: &WriteRecipe,
1173 ) -> anyhow::Result<()> {
1174 let original_metadata = self.fs.metadata_if_exists(dest_path);
1175
1176 if !recipe.has_copy_ops() {
1179 let data = recipe.flatten_inserts();
1180 return self.write_data_inplace(dest_path, &data, original_metadata);
1181 }
1182
1183 let (temp_path, mut temp_file) = self.create_recovery_temp_file(dest_path)?;
1187 if let Err(e) = self.write_recipe_to_file(&mut temp_file, recipe) {
1188 #[allow(clippy::let_underscore_must_use)]
1190 let _ = self.fs.remove_file(&temp_path);
1191 return Err(e.into());
1192 }
1193 temp_file.sync_all()?;
1194 drop(temp_file);
1195
1196 let recovery_meta_path = self.inplace_recovery_meta_path(dest_path);
1199 #[allow(clippy::let_underscore_must_use)]
1201 let _ = self.write_inplace_recovery_meta(
1202 &recovery_meta_path,
1203 dest_path,
1204 &temp_path,
1205 &original_metadata,
1206 );
1207
1208 match self.fs.open_file_for_write(dest_path) {
1211 Ok(mut out_file) => {
1212 if let Err(e) = self.stream_file_to_writer(&temp_path, &mut out_file) {
1213 return Err(e.into());
1215 }
1216 out_file.sync_all()?;
1217 #[allow(clippy::let_underscore_must_use)]
1219 let _ = self.fs.remove_file(&temp_path);
1220 #[allow(clippy::let_underscore_must_use)]
1221 let _ = self.fs.remove_file(&recovery_meta_path);
1222 Ok(())
1223 }
1224 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
1225 #[allow(clippy::let_underscore_must_use)]
1228 let _ = self.fs.remove_file(&recovery_meta_path);
1229 Err(self.make_sudo_error(temp_path, dest_path, original_metadata))
1230 }
1231 Err(e) => {
1232 Err(e.into())
1234 }
1235 }
1236 }
1237
1238 fn write_data_inplace(
1240 &self,
1241 dest_path: &Path,
1242 data: &[u8],
1243 original_metadata: Option<FileMetadata>,
1244 ) -> anyhow::Result<()> {
1245 match self.fs.open_file_for_write(dest_path) {
1246 Ok(mut out_file) => {
1247 out_file.write_all(data)?;
1248 out_file.sync_all()?;
1249 Ok(())
1250 }
1251 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
1252 let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
1254 temp_file.write_all(data)?;
1255 temp_file.sync_all()?;
1256 drop(temp_file);
1257 Err(self.make_sudo_error(temp_path, dest_path, original_metadata))
1258 }
1259 Err(e) => Err(e.into()),
1260 }
1261 }
1262
1263 fn stream_file_to_writer(
1265 &self,
1266 src_path: &Path,
1267 out_file: &mut Box<dyn crate::model::filesystem::FileWriter>,
1268 ) -> io::Result<()> {
1269 const CHUNK_SIZE: usize = 1024 * 1024; let file_size = self.fs.metadata(src_path)?.size;
1272 let mut offset = 0u64;
1273
1274 while offset < file_size {
1275 let remaining = file_size - offset;
1276 let chunk_len = std::cmp::min(remaining, CHUNK_SIZE as u64) as usize;
1277 let chunk = self.fs.read_range(src_path, offset, chunk_len)?;
1278 out_file.write_all(&chunk)?;
1279 offset += chunk_len as u64;
1280 }
1281
1282 Ok(())
1283 }
1284
1285 fn write_recipe_to_file(
1287 &self,
1288 out_file: &mut Box<dyn crate::model::filesystem::FileWriter>,
1289 recipe: &WriteRecipe,
1290 ) -> io::Result<()> {
1291 for action in &recipe.actions {
1292 match action {
1293 RecipeAction::Copy { offset, len } => {
1294 let src_path = recipe.src_path.as_ref().ok_or_else(|| {
1296 io::Error::new(io::ErrorKind::InvalidData, "Copy action without source")
1297 })?;
1298 let data = self.fs.read_range(src_path, *offset, *len as usize)?;
1299 out_file.write_all(&data)?;
1300 }
1301 RecipeAction::Insert { index } => {
1302 out_file.write_all(&recipe.insert_data[*index])?;
1303 }
1304 }
1305 }
1306 Ok(())
1307 }
1308
1309 fn finalize_save(&mut self, dest_path: &Path) -> anyhow::Result<()> {
1311 let new_size = self.fs.metadata(dest_path)?.size as usize;
1312 tracing::debug!(
1313 "Buffer::save: updating saved_file_size from {:?} to {}",
1314 self.saved_file_size,
1315 new_size
1316 );
1317 self.saved_file_size = Some(new_size);
1318 self.file_path = Some(dest_path.to_path_buf());
1319
1320 self.consolidate_after_save(dest_path, new_size);
1323
1324 self.mark_saved_snapshot();
1325 self.original_line_ending = self.line_ending;
1326 self.original_encoding = self.encoding;
1327 Ok(())
1328 }
1329
1330 pub fn finalize_external_save(&mut self, dest_path: PathBuf) -> anyhow::Result<()> {
1334 let new_size = self.fs.metadata(&dest_path)?.size as usize;
1335 self.saved_file_size = Some(new_size);
1336 self.file_path = Some(dest_path.clone());
1337
1338 self.consolidate_after_save(&dest_path, new_size);
1340
1341 self.mark_saved_snapshot();
1342 self.original_line_ending = self.line_ending;
1343 self.original_encoding = self.encoding;
1344 Ok(())
1345 }
1346
1347 fn consolidate_after_save(&mut self, path: &Path, file_size: usize) {
1351 if self.large_file {
1352 self.consolidate_large_file(path, file_size);
1353 } else {
1354 self.consolidate_small_file();
1355 }
1356 }
1357
1358 fn consolidate_large_file(&mut self, path: &Path, file_size: usize) {
1362 let preserved_lf = if self.line_feeds_scanned {
1364 self.piece_tree.line_count().map(|c| c.saturating_sub(1))
1365 } else {
1366 None
1367 };
1368
1369 let buffer = StringBuffer {
1370 id: 0,
1371 data: BufferData::Unloaded {
1372 file_path: path.to_path_buf(),
1373 file_offset: 0,
1374 bytes: file_size,
1375 },
1376 stored_file_offset: None,
1377 };
1378
1379 self.piece_tree = if file_size > 0 {
1380 PieceTree::new(BufferLocation::Stored(0), 0, file_size, preserved_lf)
1381 } else {
1382 PieceTree::empty()
1383 };
1384
1385 self.buffers = vec![buffer];
1386 self.next_buffer_id = 1;
1387
1388 tracing::debug!(
1389 "Buffer::consolidate_large_file: consolidated into single piece of {} bytes",
1390 file_size
1391 );
1392 }
1393
1394 fn consolidate_small_file(&mut self) {
1396 if let Some(bytes) = self.get_all_text() {
1397 let line_feed_cnt = bytes.iter().filter(|&&b| b == b'\n').count();
1398 let len = bytes.len();
1399
1400 let buffer = StringBuffer::new_loaded(0, bytes, true);
1402
1403 self.piece_tree = if len > 0 {
1404 PieceTree::new(BufferLocation::Stored(0), 0, len, Some(line_feed_cnt))
1405 } else {
1406 PieceTree::empty()
1407 };
1408
1409 self.buffers = vec![buffer];
1410 self.next_buffer_id = 1;
1411
1412 tracing::debug!(
1413 "Buffer::consolidate_small_file: consolidated into single loaded buffer of {} bytes",
1414 len
1415 );
1416 }
1417 }
1418
1419 fn make_sudo_error(
1421 &self,
1422 temp_path: PathBuf,
1423 dest_path: &Path,
1424 original_metadata: Option<FileMetadata>,
1425 ) -> anyhow::Error {
1426 #[cfg(unix)]
1427 let (uid, gid, mode) = if let Some(ref meta) = original_metadata {
1428 (
1429 meta.uid.unwrap_or(0),
1430 meta.gid.unwrap_or(0),
1431 meta.permissions
1432 .as_ref()
1433 .map(|p| p.mode() & 0o7777)
1434 .unwrap_or(0),
1435 )
1436 } else {
1437 (0, 0, 0)
1438 };
1439 #[cfg(not(unix))]
1440 let (uid, gid, mode) = (0u32, 0u32, 0u32);
1441
1442 let _ = original_metadata; anyhow::anyhow!(SudoSaveRequired {
1445 temp_path,
1446 dest_path: dest_path.to_path_buf(),
1447 uid,
1448 gid,
1449 mode,
1450 })
1451 }
1452
1453 pub fn total_bytes(&self) -> usize {
1455 self.piece_tree.total_bytes()
1456 }
1457
1458 pub fn line_count(&self) -> Option<usize> {
1462 self.piece_tree.line_count()
1463 }
1464
1465 pub fn mark_saved_snapshot(&mut self) {
1467 self.saved_root = self.piece_tree.root();
1468 self.modified = false;
1469 }
1470
1471 pub fn refresh_saved_root_if_unmodified(&mut self) {
1476 if !self.modified {
1477 self.saved_root = self.piece_tree.root();
1478 }
1479 }
1480
1481 fn apply_chunk_load_to_saved_root(
1489 &mut self,
1490 old_buffer_id: usize,
1491 chunk_offset_in_buffer: usize,
1492 chunk_bytes: usize,
1493 new_buffer_id: usize,
1494 ) {
1495 use crate::model::piece_tree::{LeafData, PieceTree};
1496
1497 let mut leaves = Vec::new();
1498 self.saved_root.collect_leaves(&mut leaves);
1499
1500 let mut modified = false;
1501 let mut new_leaves: Vec<LeafData> = Vec::with_capacity(leaves.len() + 2);
1502
1503 for leaf in &leaves {
1504 if leaf.location.buffer_id() != old_buffer_id {
1505 new_leaves.push(*leaf);
1506 continue;
1507 }
1508
1509 let leaf_start = leaf.offset;
1510 let leaf_end = leaf.offset + leaf.bytes;
1511 let chunk_start = chunk_offset_in_buffer;
1512 let chunk_end = chunk_offset_in_buffer + chunk_bytes;
1513
1514 if chunk_start >= leaf_end || chunk_end <= leaf_start {
1516 new_leaves.push(*leaf);
1518 continue;
1519 }
1520
1521 modified = true;
1522
1523 if chunk_start > leaf_start {
1525 new_leaves.push(LeafData::new(
1526 leaf.location,
1527 leaf.offset,
1528 chunk_start - leaf_start,
1529 None, ));
1531 }
1532
1533 let actual_start = chunk_start.max(leaf_start);
1535 let actual_end = chunk_end.min(leaf_end);
1536 let offset_in_chunk = actual_start - chunk_start;
1537 new_leaves.push(LeafData::new(
1538 BufferLocation::Added(new_buffer_id),
1539 offset_in_chunk,
1540 actual_end - actual_start,
1541 None,
1542 ));
1543
1544 if chunk_end < leaf_end {
1546 new_leaves.push(LeafData::new(
1547 leaf.location,
1548 chunk_end,
1549 leaf_end - chunk_end,
1550 None,
1551 ));
1552 }
1553 }
1554
1555 if modified {
1556 self.saved_root = PieceTree::from_leaves(&new_leaves).root();
1557 }
1558 }
1559
1560 pub fn diff_since_saved(&self) -> PieceTreeDiff {
1572 let _span = tracing::info_span!(
1573 "diff_since_saved",
1574 large_file = self.large_file,
1575 modified = self.modified,
1576 lf_scanned = self.line_feeds_scanned
1577 )
1578 .entered();
1579
1580 if !self.modified {
1586 tracing::trace!("diff_since_saved: not modified → equal");
1587 return PieceTreeDiff {
1588 equal: true,
1589 byte_ranges: Vec::new(),
1590 line_ranges: Some(Vec::new()),
1591 nodes_visited: 0,
1592 };
1593 }
1594
1595 if Arc::ptr_eq(&self.saved_root, &self.piece_tree.root()) {
1598 tracing::trace!("diff_since_saved: Arc::ptr_eq fast path → equal");
1599 return PieceTreeDiff {
1600 equal: true,
1601 byte_ranges: Vec::new(),
1602 line_ranges: Some(Vec::new()),
1603 nodes_visited: 0,
1604 };
1605 }
1606
1607 let structure_diff = self.diff_trees_by_structure();
1610
1611 if structure_diff.equal {
1613 tracing::trace!(
1614 "diff_since_saved: structure equal, line_ranges={}",
1615 structure_diff
1616 .line_ranges
1617 .as_ref()
1618 .map_or("None".to_string(), |r| format!("Some({})", r.len()))
1619 );
1620 return structure_diff;
1621 }
1622
1623 let total_changed_bytes: usize = structure_diff
1627 .byte_ranges
1628 .iter()
1629 .map(|r| r.end.saturating_sub(r.start))
1630 .sum();
1631
1632 const MAX_VERIFY_BYTES: usize = 64 * 1024; if total_changed_bytes <= MAX_VERIFY_BYTES && !structure_diff.byte_ranges.is_empty() {
1637 if self.verify_content_differs_in_ranges(&structure_diff.byte_ranges) {
1639 tracing::trace!(
1640 "diff_since_saved: content differs, byte_ranges={}, line_ranges={}",
1641 structure_diff.byte_ranges.len(),
1642 structure_diff
1643 .line_ranges
1644 .as_ref()
1645 .map_or("None".to_string(), |r| format!("Some({})", r.len()))
1646 );
1647 return structure_diff;
1649 } else {
1650 return PieceTreeDiff {
1652 equal: true,
1653 byte_ranges: Vec::new(),
1654 line_ranges: Some(Vec::new()),
1655 nodes_visited: structure_diff.nodes_visited,
1656 };
1657 }
1658 }
1659
1660 tracing::info!(
1661 "diff_since_saved: large change, byte_ranges={}, line_ranges={}, nodes_visited={}",
1662 structure_diff.byte_ranges.len(),
1663 structure_diff
1664 .line_ranges
1665 .as_ref()
1666 .map_or("None".to_string(), |r| format!("Some({})", r.len())),
1667 structure_diff.nodes_visited
1668 );
1669 structure_diff
1671 }
1672
1673 fn verify_content_differs_in_ranges(&self, byte_ranges: &[std::ops::Range<usize>]) -> bool {
1676 let saved_bytes = self.tree_total_bytes(&self.saved_root);
1677 let current_bytes = self.piece_tree.total_bytes();
1678
1679 if saved_bytes != current_bytes {
1681 return true;
1682 }
1683
1684 for range in byte_ranges {
1686 if range.start >= range.end {
1687 continue;
1688 }
1689
1690 let saved_slice =
1692 self.extract_range_from_tree(&self.saved_root, range.start, range.end);
1693 let current_slice = self.get_text_range(range.start, range.end);
1695
1696 match (saved_slice, current_slice) {
1697 (Some(saved), Some(current)) => {
1698 if saved != current {
1699 return true; }
1701 }
1702 _ => {
1703 return true;
1705 }
1706 }
1707 }
1708
1709 false
1711 }
1712
1713 fn extract_range_from_tree(
1715 &self,
1716 root: &Arc<crate::model::piece_tree::PieceTreeNode>,
1717 start: usize,
1718 end: usize,
1719 ) -> Option<Vec<u8>> {
1720 let mut result = Vec::with_capacity(end.saturating_sub(start));
1721 self.collect_range_from_node(root, start, end, 0, &mut result)?;
1722 Some(result)
1723 }
1724
1725 fn collect_range_from_node(
1727 &self,
1728 node: &Arc<crate::model::piece_tree::PieceTreeNode>,
1729 range_start: usize,
1730 range_end: usize,
1731 node_offset: usize,
1732 result: &mut Vec<u8>,
1733 ) -> Option<()> {
1734 use crate::model::piece_tree::PieceTreeNode;
1735
1736 match node.as_ref() {
1737 PieceTreeNode::Internal {
1738 left_bytes,
1739 left,
1740 right,
1741 ..
1742 } => {
1743 let left_end = node_offset + left_bytes;
1744
1745 if range_start < left_end {
1747 self.collect_range_from_node(
1748 left,
1749 range_start,
1750 range_end,
1751 node_offset,
1752 result,
1753 )?;
1754 }
1755
1756 if range_end > left_end {
1758 self.collect_range_from_node(right, range_start, range_end, left_end, result)?;
1759 }
1760 }
1761 PieceTreeNode::Leaf {
1762 location,
1763 offset,
1764 bytes,
1765 ..
1766 } => {
1767 let node_end = node_offset + bytes;
1768
1769 if range_start < node_end && range_end > node_offset {
1771 let buf = self.buffers.get(location.buffer_id())?;
1772 let data = buf.get_data()?;
1773
1774 let leaf_start = range_start.saturating_sub(node_offset);
1776 let leaf_end = (range_end - node_offset).min(*bytes);
1777
1778 if leaf_start < leaf_end {
1779 let slice = data.get(*offset + leaf_start..*offset + leaf_end)?;
1780 result.extend_from_slice(slice);
1781 }
1782 }
1783 }
1784 }
1785 Some(())
1786 }
1787
1788 fn tree_total_bytes(&self, root: &Arc<crate::model::piece_tree::PieceTreeNode>) -> usize {
1790 use crate::model::piece_tree::PieceTreeNode;
1791 match root.as_ref() {
1792 PieceTreeNode::Internal {
1793 left_bytes, right, ..
1794 } => left_bytes + self.tree_total_bytes(right),
1795 PieceTreeNode::Leaf { bytes, .. } => *bytes,
1796 }
1797 }
1798
1799 fn diff_trees_by_structure(&self) -> PieceTreeDiff {
1801 crate::model::piece_tree_diff::diff_piece_trees(
1802 &self.saved_root,
1803 &self.piece_tree.root(),
1804 &|leaf, start, len| {
1805 if len == 0 {
1806 return Some(0);
1807 }
1808 if let Some(buf) = self.buffers.get(leaf.location.buffer_id()) {
1810 if let Some(data) = buf.get_data() {
1811 let start = leaf.offset + start;
1812 let end = start + len;
1813 if let Some(slice) = data.get(start..end) {
1814 let line_feeds = slice.iter().filter(|&&b| b == b'\n').count();
1815 return Some(line_feeds);
1816 }
1817 }
1818 }
1819 if start == 0 && len == leaf.bytes {
1823 leaf.line_feed_cnt.map(|c| c)
1824 } else {
1825 tracing::warn!(
1826 "diff line_counter: returning None for partial leaf query: \
1827 loc={:?} offset={} bytes={} lf_cnt={:?} query_start={} query_len={}",
1828 leaf.location,
1829 leaf.offset,
1830 leaf.bytes,
1831 leaf.line_feed_cnt,
1832 start,
1833 len
1834 );
1835 None
1836 }
1837 },
1838 )
1839 }
1840
1841 pub fn offset_to_position(&self, offset: usize) -> Option<Position> {
1843 self.piece_tree
1844 .offset_to_position(offset, &self.buffers)
1845 .map(|(line, column)| Position { line, column })
1846 }
1847
1848 pub fn position_to_offset(&self, position: Position) -> usize {
1850 self.piece_tree
1851 .position_to_offset(position.line, position.column, &self.buffers)
1852 }
1853
1854 pub fn insert_bytes(&mut self, offset: usize, text: Vec<u8>) -> Cursor {
1856 if text.is_empty() {
1857 return self.piece_tree.cursor_at_offset(offset);
1858 }
1859
1860 self.mark_content_modified();
1862
1863 let line_feed_cnt = Some(text.iter().filter(|&&b| b == b'\n').count());
1865
1866 let (buffer_location, buffer_offset, text_len) =
1868 if let Some(append_info) = self.try_append_to_existing_buffer(offset, &text) {
1869 append_info
1870 } else {
1871 let buffer_id = self.next_buffer_id;
1873 self.next_buffer_id += 1;
1874 let buffer = StringBuffer::new(buffer_id, text.clone());
1875 self.buffers.push(buffer);
1876 (BufferLocation::Added(buffer_id), 0, text.len())
1877 };
1878
1879 if self.line_feeds_scanned {
1882 self.ensure_chunk_loaded_at(offset);
1883 }
1884
1885 self.piece_tree.insert(
1887 offset,
1888 buffer_location,
1889 buffer_offset,
1890 text_len,
1891 line_feed_cnt,
1892 &self.buffers,
1893 )
1894 }
1895
1896 fn try_append_to_existing_buffer(
1899 &mut self,
1900 offset: usize,
1901 text: &[u8],
1902 ) -> Option<(BufferLocation, usize, usize)> {
1903 if text.is_empty() || offset == 0 {
1905 return None;
1906 }
1907
1908 let piece_info = self.piece_tree.find_by_offset(offset - 1)?;
1911
1912 let offset_in_piece = piece_info.offset_in_piece?;
1916 if offset_in_piece + 1 != piece_info.bytes {
1917 return None; }
1919
1920 if !matches!(piece_info.location, BufferLocation::Added(_)) {
1922 return None;
1923 }
1924
1925 let buffer_id = piece_info.location.buffer_id();
1926 let buffer = self.buffers.get_mut(buffer_id)?;
1927
1928 let buffer_len = buffer.get_data()?.len();
1930
1931 if piece_info.offset + piece_info.bytes != buffer_len {
1933 return None;
1934 }
1935
1936 let append_offset = buffer.append(text);
1938
1939 Some((piece_info.location, append_offset, text.len()))
1940 }
1941
1942 pub fn insert(&mut self, offset: usize, text: &str) {
1944 self.insert_bytes(offset, text.as_bytes().to_vec());
1945 }
1946
1947 pub fn insert_at_position(&mut self, position: Position, text: Vec<u8>) -> Cursor {
1950 if text.is_empty() {
1951 let offset = self.position_to_offset(position);
1952 return self.piece_tree.cursor_at_offset(offset);
1953 }
1954
1955 self.mark_content_modified();
1956
1957 let line_feed_cnt = text.iter().filter(|&&b| b == b'\n').count();
1959
1960 let buffer_id = self.next_buffer_id;
1962 self.next_buffer_id += 1;
1963 let buffer = StringBuffer::new(buffer_id, text.clone());
1964 self.buffers.push(buffer);
1965
1966 self.piece_tree.insert_at_position(
1968 position.line,
1969 position.column,
1970 BufferLocation::Added(buffer_id),
1971 0,
1972 text.len(),
1973 line_feed_cnt,
1974 &self.buffers,
1975 )
1976 }
1977
1978 pub fn delete_bytes(&mut self, offset: usize, bytes: usize) {
1980 if bytes == 0 || offset >= self.total_bytes() {
1981 return;
1982 }
1983
1984 if self.line_feeds_scanned {
1987 self.ensure_chunk_loaded_at(offset);
1988 let end = (offset + bytes).min(self.total_bytes());
1989 if end > offset {
1990 self.ensure_chunk_loaded_at(end.saturating_sub(1));
1991 }
1992 }
1993
1994 self.piece_tree.delete(offset, bytes, &self.buffers);
1996
1997 self.mark_content_modified();
1998 }
1999
2000 pub fn delete(&mut self, range: Range<usize>) {
2002 if range.end > range.start {
2003 self.delete_bytes(range.start, range.end - range.start);
2004 }
2005 }
2006
2007 pub fn delete_range(&mut self, start: Position, end: Position) {
2010 self.piece_tree.delete_position_range(
2012 start.line,
2013 start.column,
2014 end.line,
2015 end.column,
2016 &self.buffers,
2017 );
2018 self.mark_content_modified();
2019 }
2020
2021 pub fn replace_content(&mut self, new_content: &str) {
2028 let bytes = new_content.len();
2029 let content_bytes = new_content.as_bytes().to_vec();
2030
2031 let line_feed_cnt = content_bytes.iter().filter(|&&b| b == b'\n').count();
2033
2034 let buffer_id = self.next_buffer_id;
2036 self.next_buffer_id += 1;
2037 let buffer = StringBuffer::new(buffer_id, content_bytes);
2038 self.buffers.push(buffer);
2039
2040 if bytes > 0 {
2042 self.piece_tree = PieceTree::new(
2043 BufferLocation::Added(buffer_id),
2044 0,
2045 bytes,
2046 Some(line_feed_cnt),
2047 );
2048 } else {
2049 self.piece_tree = PieceTree::empty();
2050 }
2051
2052 self.mark_content_modified();
2053 }
2054
2055 pub fn restore_buffer_state(&mut self, snapshot: &BufferSnapshot) {
2061 self.piece_tree = snapshot.piece_tree.clone();
2062 self.buffers = snapshot.buffers.clone();
2063 self.next_buffer_id = snapshot.next_buffer_id;
2064 self.mark_content_modified();
2065 }
2066
2067 pub fn snapshot_buffer_state(&self) -> Arc<BufferSnapshot> {
2073 Arc::new(BufferSnapshot {
2074 piece_tree: self.piece_tree.clone(),
2075 buffers: self.buffers.clone(),
2076 next_buffer_id: self.next_buffer_id,
2077 })
2078 }
2079
2080 pub fn apply_bulk_edits(&mut self, edits: &[(usize, usize, &str)]) -> isize {
2083 let mut buffer_info: Vec<(BufferLocation, usize, usize, Option<usize>)> = Vec::new();
2088
2089 for (_, _, text) in edits {
2090 if !text.is_empty() {
2091 let buffer_id = self.next_buffer_id;
2092 self.next_buffer_id += 1;
2093 let content = text.as_bytes().to_vec();
2094 let lf_cnt = content.iter().filter(|&&b| b == b'\n').count();
2095 let bytes = content.len();
2096 let buffer = StringBuffer::new(buffer_id, content);
2097 self.buffers.push(buffer);
2098 buffer_info.push((BufferLocation::Added(buffer_id), 0, bytes, Some(lf_cnt)));
2099 }
2100 }
2102
2103 let mut idx = 0;
2105 let delta = self
2106 .piece_tree
2107 .apply_bulk_edits(edits, &self.buffers, |_text| {
2108 let info = buffer_info[idx];
2109 idx += 1;
2110 info
2111 });
2112
2113 self.mark_content_modified();
2114 delta
2115 }
2116
2117 fn get_text_range(&self, offset: usize, bytes: usize) -> Option<Vec<u8>> {
2123 if bytes == 0 {
2124 return Some(Vec::new());
2125 }
2126
2127 let mut result = Vec::with_capacity(bytes);
2128 let end_offset = offset + bytes;
2129 let mut collected = 0;
2130
2131 for piece_view in self.piece_tree.iter_pieces_in_range(offset, end_offset) {
2133 let buffer_id = piece_view.location.buffer_id();
2134 if let Some(buffer) = self.buffers.get(buffer_id) {
2135 let piece_start_in_doc = piece_view.doc_offset;
2137 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2138
2139 let read_start = offset.max(piece_start_in_doc);
2141 let read_end = end_offset.min(piece_end_in_doc);
2142
2143 if read_end > read_start {
2144 let offset_in_piece = read_start - piece_start_in_doc;
2145 let bytes_to_read = read_end - read_start;
2146
2147 let buffer_start = piece_view.buffer_offset + offset_in_piece;
2148 let buffer_end = buffer_start + bytes_to_read;
2149
2150 let data = buffer.get_data()?;
2152
2153 if buffer_end <= data.len() {
2154 result.extend_from_slice(&data[buffer_start..buffer_end]);
2155 collected += bytes_to_read;
2156
2157 if collected >= bytes {
2158 break;
2159 }
2160 }
2161 }
2162 }
2163 }
2164
2165 Some(result)
2166 }
2167
2168 pub fn get_text_range_mut(&mut self, offset: usize, bytes: usize) -> Result<Vec<u8>> {
2176 let _span = tracing::info_span!("get_text_range_mut", offset, bytes).entered();
2177 if bytes == 0 {
2178 return Ok(Vec::new());
2179 }
2180
2181 let mut result = Vec::with_capacity(bytes);
2182 let end_offset = (offset + bytes).min(self.len());
2184 let mut current_offset = offset;
2185 let mut iteration_count = 0u32;
2186
2187 while current_offset < end_offset {
2189 iteration_count += 1;
2190 let mut made_progress = false;
2191 let mut restarted_iteration = false;
2192
2193 for piece_view in self
2195 .piece_tree
2196 .iter_pieces_in_range(current_offset, end_offset)
2197 {
2198 let buffer_id = piece_view.location.buffer_id();
2199
2200 let needs_loading = self
2202 .buffers
2203 .get(buffer_id)
2204 .map(|b| !b.is_loaded())
2205 .unwrap_or(false);
2206
2207 if needs_loading && self.chunk_split_and_load(&piece_view, current_offset)? {
2208 restarted_iteration = true;
2209 break;
2210 }
2211
2212 let piece_start_in_doc = piece_view.doc_offset;
2214 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2215
2216 let read_start = current_offset.max(piece_start_in_doc);
2218 let read_end = end_offset.min(piece_end_in_doc);
2219
2220 if read_end > read_start {
2221 let offset_in_piece = read_start - piece_start_in_doc;
2222 let bytes_to_read = read_end - read_start;
2223
2224 let buffer_start = piece_view.buffer_offset + offset_in_piece;
2225 let buffer_end = buffer_start + bytes_to_read;
2226
2227 let buffer = self.buffers.get(buffer_id).context("Buffer not found")?;
2229 let data = buffer
2230 .get_data()
2231 .context("Buffer data unavailable after load")?;
2232
2233 anyhow::ensure!(
2234 buffer_end <= data.len(),
2235 "Buffer range out of bounds: requested {}..{}, buffer size {}",
2236 buffer_start,
2237 buffer_end,
2238 data.len()
2239 );
2240
2241 result.extend_from_slice(&data[buffer_start..buffer_end]);
2242 current_offset = read_end;
2243 made_progress = true;
2244 }
2245 }
2246
2247 if !made_progress && !restarted_iteration {
2249 tracing::error!(
2250 "get_text_range_mut: No progress at offset {} (requested range: {}..{}, buffer len: {})",
2251 current_offset,
2252 offset,
2253 end_offset,
2254 self.len()
2255 );
2256 tracing::error!(
2257 "Piece tree stats: {} total bytes",
2258 self.piece_tree.stats().total_bytes
2259 );
2260 anyhow::bail!(
2261 "Failed to read data at offset {}: no progress made (requested {}..{}, buffer len: {})",
2262 current_offset,
2263 offset,
2264 end_offset,
2265 self.len()
2266 );
2267 }
2268 }
2269
2270 if iteration_count > 1 {
2271 tracing::info!(
2272 iteration_count,
2273 result_len = result.len(),
2274 "get_text_range_mut: completed with multiple iterations"
2275 );
2276 }
2277
2278 Ok(result)
2279 }
2280
2281 pub fn prepare_viewport(&mut self, start_offset: usize, line_count: usize) -> Result<()> {
2294 let _span = tracing::info_span!("prepare_viewport", start_offset, line_count).entered();
2295 let estimated_bytes = line_count.saturating_mul(200);
2298
2299 let remaining_bytes = self.total_bytes().saturating_sub(start_offset);
2301 let bytes_to_load = estimated_bytes.min(remaining_bytes);
2302 tracing::trace!(
2303 bytes_to_load,
2304 total_bytes = self.total_bytes(),
2305 "prepare_viewport loading"
2306 );
2307
2308 self.get_text_range_mut(start_offset, bytes_to_load)?;
2311
2312 Ok(())
2313 }
2314
2315 fn chunk_split_and_load(
2321 &mut self,
2322 piece_view: &PieceView,
2323 current_offset: usize,
2324 ) -> Result<bool> {
2325 let buffer_id = piece_view.location.buffer_id();
2326
2327 let buffer_bytes = self
2332 .buffers
2333 .get(buffer_id)
2334 .and_then(|b| b.unloaded_bytes())
2335 .unwrap_or(0);
2336 let needs_chunk_split =
2337 piece_view.bytes > LOAD_CHUNK_SIZE || buffer_bytes > piece_view.bytes;
2338
2339 tracing::info!(
2340 buffer_id,
2341 piece_bytes = piece_view.bytes,
2342 buffer_bytes,
2343 needs_chunk_split,
2344 piece_doc_offset = piece_view.doc_offset,
2345 current_offset,
2346 "chunk_split_and_load: loading unloaded piece"
2347 );
2348
2349 if !needs_chunk_split {
2350 let _span = tracing::info_span!(
2352 "load_small_buffer",
2353 piece_bytes = piece_view.bytes,
2354 buffer_id,
2355 )
2356 .entered();
2357 self.buffers
2358 .get_mut(buffer_id)
2359 .context("Buffer not found")?
2360 .load(&*self.fs)
2361 .context("Failed to load buffer")?;
2362 return Ok(false);
2363 }
2364
2365 let _span = tracing::info_span!(
2366 "chunk_split_and_load",
2367 piece_bytes = piece_view.bytes,
2368 buffer_id,
2369 )
2370 .entered();
2371
2372 let piece_start_in_doc = piece_view.doc_offset;
2373 let offset_in_piece = current_offset.saturating_sub(piece_start_in_doc);
2374
2375 let (chunk_start_in_buffer, chunk_bytes) = if piece_view.bytes <= LOAD_CHUNK_SIZE {
2380 (piece_view.buffer_offset, piece_view.bytes)
2381 } else {
2382 let start =
2383 (piece_view.buffer_offset + offset_in_piece) / CHUNK_ALIGNMENT * CHUNK_ALIGNMENT;
2384 let bytes = LOAD_CHUNK_SIZE
2385 .min((piece_view.buffer_offset + piece_view.bytes).saturating_sub(start));
2386 (start, bytes)
2387 };
2388
2389 let chunk_start_offset_in_piece =
2391 chunk_start_in_buffer.saturating_sub(piece_view.buffer_offset);
2392 let split_start_in_doc = piece_start_in_doc + chunk_start_offset_in_piece;
2393 let split_end_in_doc = split_start_in_doc + chunk_bytes;
2394
2395 if chunk_start_offset_in_piece > 0 {
2397 self.piece_tree
2398 .split_at_offset(split_start_in_doc, &self.buffers);
2399 }
2400 if split_end_in_doc < piece_start_in_doc + piece_view.bytes {
2401 self.piece_tree
2402 .split_at_offset(split_end_in_doc, &self.buffers);
2403 }
2404
2405 let chunk_buffer = self
2407 .buffers
2408 .get(buffer_id)
2409 .context("Buffer not found")?
2410 .create_chunk_buffer(self.next_buffer_id, chunk_start_in_buffer, chunk_bytes)
2411 .context("Failed to create chunk buffer")?;
2412
2413 self.next_buffer_id += 1;
2414 let new_buffer_id = chunk_buffer.id;
2415 self.buffers.push(chunk_buffer);
2416
2417 self.piece_tree.replace_buffer_reference(
2419 buffer_id,
2420 piece_view.buffer_offset + chunk_start_offset_in_piece,
2421 chunk_bytes,
2422 BufferLocation::Added(new_buffer_id),
2423 );
2424
2425 self.buffers
2427 .get_mut(new_buffer_id)
2428 .context("Chunk buffer not found")?
2429 .load(&*self.fs)
2430 .context("Failed to load chunk")?;
2431
2432 if self.line_feeds_scanned {
2437 let leaves = self.piece_tree.get_leaves();
2438 let mut fixups: Vec<(usize, usize)> = Vec::new();
2439 for (idx, leaf) in leaves.iter().enumerate() {
2440 if leaf.line_feed_cnt.is_none() {
2441 if let Ok(count) = self.scan_leaf(leaf) {
2442 fixups.push((idx, count));
2443 }
2444 }
2445 }
2446 if !fixups.is_empty() {
2447 self.piece_tree.update_leaf_line_feeds_path_copy(&fixups);
2448 }
2449 }
2450
2451 if !self.modified {
2459 self.saved_root = self.piece_tree.root();
2460 } else {
2461 self.apply_chunk_load_to_saved_root(
2462 buffer_id,
2463 chunk_start_in_buffer,
2464 chunk_bytes,
2465 new_buffer_id,
2466 );
2467 }
2468
2469 Ok(true)
2470 }
2471
2472 pub(crate) fn get_all_text(&self) -> Option<Vec<u8>> {
2476 self.get_text_range(0, self.total_bytes())
2477 }
2478
2479 pub(crate) fn get_all_text_string(&self) -> Option<String> {
2483 self.get_all_text()
2484 .map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
2485 }
2486
2487 pub(crate) fn slice_bytes(&self, range: Range<usize>) -> Vec<u8> {
2492 self.get_text_range(range.start, range.end.saturating_sub(range.start))
2493 .unwrap_or_default()
2494 }
2495
2496 pub fn to_string(&self) -> Option<String> {
2499 self.get_all_text_string()
2500 }
2501
2502 pub fn len(&self) -> usize {
2504 self.total_bytes()
2505 }
2506
2507 pub fn is_empty(&self) -> bool {
2509 self.total_bytes() == 0
2510 }
2511
2512 pub fn file_path(&self) -> Option<&Path> {
2514 self.file_path.as_deref()
2515 }
2516
2517 pub fn rename_file_path(&mut self, path: PathBuf) {
2519 self.file_path = Some(path);
2520 }
2521
2522 pub fn clear_file_path(&mut self) {
2526 self.file_path = None;
2527 }
2528
2529 pub fn extend_streaming(&mut self, source_path: &Path, new_size: usize) {
2533 let old_size = self.total_bytes();
2534 if new_size <= old_size {
2535 return;
2536 }
2537
2538 let additional_bytes = new_size - old_size;
2539
2540 let buffer_id = self.next_buffer_id;
2542 self.next_buffer_id += 1;
2543
2544 let new_buffer = StringBuffer::new_unloaded(
2545 buffer_id,
2546 source_path.to_path_buf(),
2547 old_size, additional_bytes, );
2550 self.buffers.push(new_buffer);
2551
2552 self.piece_tree.insert(
2554 old_size,
2555 BufferLocation::Stored(buffer_id),
2556 0,
2557 additional_bytes,
2558 None, &self.buffers,
2560 );
2561 }
2562
2563 pub fn is_modified(&self) -> bool {
2565 self.modified
2566 }
2567
2568 pub fn clear_modified(&mut self) {
2570 self.modified = false;
2571 }
2572
2573 pub fn set_modified(&mut self, modified: bool) {
2576 self.modified = modified;
2577 }
2578
2579 pub fn is_recovery_pending(&self) -> bool {
2581 self.recovery_pending
2582 }
2583
2584 pub fn set_recovery_pending(&mut self, pending: bool) {
2586 self.recovery_pending = pending;
2587 }
2588
2589 fn ensure_chunk_loaded_at(&mut self, offset: usize) {
2595 if let Some(piece_info) = self.piece_tree.find_by_offset(offset) {
2596 let buffer_id = piece_info.location.buffer_id();
2597 if let Some(buffer) = self.buffers.get_mut(buffer_id) {
2598 if !buffer.is_loaded() {
2599 let buf_bytes = buffer.unloaded_bytes().unwrap_or(0);
2600 tracing::info!(
2601 "ensure_chunk_loaded_at: loading buffer {} ({} bytes) for offset {}",
2602 buffer_id,
2603 buf_bytes,
2604 offset
2605 );
2606 if let Err(e) = buffer.load(&*self.fs) {
2607 tracing::warn!("Failed to load chunk at offset {offset}: {e}");
2608 }
2609 }
2610 }
2611 }
2612 }
2613
2614 pub fn is_large_file(&self) -> bool {
2616 self.large_file
2617 }
2618
2619 pub fn has_line_feed_scan(&self) -> bool {
2622 self.line_feeds_scanned
2623 }
2624
2625 pub fn piece_tree_leaves(&self) -> Vec<crate::model::piece_tree::LeafData> {
2627 self.piece_tree.get_leaves()
2628 }
2629
2630 pub fn prepare_line_scan(&mut self) -> (Vec<LineScanChunk>, usize) {
2639 self.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
2641
2642 let leaves = self.piece_tree.get_leaves();
2643 let total_bytes: usize = leaves.iter().map(|l| l.bytes).sum();
2644 let mut chunks = Vec::new();
2645
2646 for (idx, leaf) in leaves.iter().enumerate() {
2647 chunks.push(LineScanChunk {
2648 leaf_index: idx,
2649 byte_len: leaf.bytes,
2650 already_known: leaf.line_feed_cnt.is_some(),
2651 });
2652 }
2653
2654 (chunks, total_bytes)
2655 }
2656
2657 pub fn scan_leaf(&self, leaf: &crate::model::piece_tree::LeafData) -> std::io::Result<usize> {
2662 let buffer_id = leaf.location.buffer_id();
2663 let buffer = self
2664 .buffers
2665 .get(buffer_id)
2666 .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::NotFound, "buffer not found"))?;
2667
2668 let count = match &buffer.data {
2669 crate::model::piece_tree::BufferData::Loaded { data, .. } => {
2670 let end = (leaf.offset + leaf.bytes).min(data.len());
2671 data[leaf.offset..end]
2672 .iter()
2673 .filter(|&&b| b == b'\n')
2674 .count()
2675 }
2676 crate::model::piece_tree::BufferData::Unloaded {
2677 file_path,
2678 file_offset,
2679 ..
2680 } => {
2681 let read_offset = *file_offset as u64 + leaf.offset as u64;
2682 self.fs
2683 .count_line_feeds_in_range(file_path, read_offset, leaf.bytes)?
2684 }
2685 };
2686 Ok(count)
2687 }
2688
2689 pub fn leaf_io_params(
2694 &self,
2695 leaf: &crate::model::piece_tree::LeafData,
2696 ) -> Option<(std::path::PathBuf, u64, usize)> {
2697 let buffer_id = leaf.location.buffer_id();
2698 let buffer = self.buffers.get(buffer_id)?;
2699 match &buffer.data {
2700 crate::model::piece_tree::BufferData::Loaded { .. } => None,
2701 crate::model::piece_tree::BufferData::Unloaded {
2702 file_path,
2703 file_offset,
2704 ..
2705 } => {
2706 let read_offset = *file_offset as u64 + leaf.offset as u64;
2707 Some((file_path.clone(), read_offset, leaf.bytes))
2708 }
2709 }
2710 }
2711
2712 pub fn buffer_slice(&self) -> &[StringBuffer] {
2714 &self.buffers
2715 }
2716
2717 pub fn apply_scan_updates(&mut self, updates: &[(usize, usize)]) {
2719 self.piece_tree.update_leaf_line_feeds(updates);
2720 self.line_feeds_scanned = true;
2721 }
2722
2723 pub fn rebuild_with_pristine_saved_root(&mut self, scan_updates: &[(usize, usize)]) {
2728 let file_size = match self.saved_file_size {
2729 Some(s) => s,
2730 None => {
2731 self.apply_scan_updates(scan_updates);
2734 return;
2735 }
2736 };
2737
2738 let total = self.total_bytes();
2740 let mut deletions: Vec<(usize, usize)> = Vec::new();
2742 let mut insertions: Vec<(usize, BufferLocation, usize, usize, Option<usize>)> = Vec::new();
2745 let mut orig_cursor: usize = 0;
2746 let mut stored_bytes_in_doc: usize = 0;
2747
2748 for piece in self.piece_tree.iter_pieces_in_range(0, total) {
2749 match piece.location {
2750 BufferLocation::Stored(_) => {
2751 if piece.buffer_offset > orig_cursor {
2752 deletions.push((orig_cursor, piece.buffer_offset - orig_cursor));
2753 }
2754 orig_cursor = piece.buffer_offset + piece.bytes;
2755 stored_bytes_in_doc += piece.bytes;
2756 }
2757 BufferLocation::Added(id) => {
2758 if let Some(file_off) = self.buffers.get(id).and_then(|b| b.stored_file_offset)
2762 {
2763 if file_off > orig_cursor {
2764 deletions.push((orig_cursor, file_off - orig_cursor));
2765 }
2766 orig_cursor = file_off + piece.bytes;
2767 stored_bytes_in_doc += piece.bytes;
2768 } else {
2769 insertions.push((
2770 stored_bytes_in_doc,
2771 piece.location,
2772 piece.buffer_offset,
2773 piece.bytes,
2774 piece.line_feed_cnt,
2775 ));
2776 }
2777 }
2778 }
2779 }
2780 if orig_cursor < file_size {
2782 deletions.push((orig_cursor, file_size - orig_cursor));
2783 }
2784
2785 let mut pristine = if file_size > 0 {
2787 PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
2788 } else {
2789 PieceTree::empty()
2790 };
2791 pristine.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
2792 pristine.update_leaf_line_feeds(scan_updates);
2793
2794 self.saved_root = pristine.root();
2796
2797 if deletions.is_empty() && insertions.is_empty() {
2799 self.piece_tree = pristine;
2800 self.line_feeds_scanned = true;
2801 return;
2802 }
2803
2804 let mut tree = pristine;
2806
2807 deletions.sort_by(|a, b| b.0.cmp(&a.0));
2809 for &(offset, len) in &deletions {
2810 tree.delete(offset, len, &self.buffers);
2811 }
2812
2813 let mut insert_delta: usize = 0;
2816 for &(offset, location, buf_offset, bytes, lf_cnt) in &insertions {
2817 tree.insert(
2818 offset + insert_delta,
2819 location,
2820 buf_offset,
2821 bytes,
2822 lf_cnt,
2823 &self.buffers,
2824 );
2825 insert_delta += bytes;
2826 }
2827
2828 let leaves = tree.get_leaves();
2833 let mut fixups: Vec<(usize, usize)> = Vec::new();
2834 for (idx, leaf) in leaves.iter().enumerate() {
2835 if leaf.line_feed_cnt.is_none() {
2836 if let Ok(count) = self.scan_leaf(leaf) {
2837 fixups.push((idx, count));
2838 }
2839 }
2840 }
2841 if !fixups.is_empty() {
2842 tree.update_leaf_line_feeds_path_copy(&fixups);
2843 }
2844
2845 self.piece_tree = tree;
2846 self.line_feeds_scanned = true;
2847 }
2848
2849 pub fn resolve_line_byte_offset(&mut self, target_line: usize) -> Option<usize> {
2855 if target_line == 0 {
2856 return Some(0);
2857 }
2858
2859 let (doc_offset, buffer_id, piece_offset, piece_bytes, lines_before) =
2861 self.piece_tree.piece_info_for_line(target_line)?;
2862
2863 let lines_to_skip = target_line - lines_before;
2865
2866 let buffer = self.buffers.get(buffer_id)?;
2868 let piece_data: Vec<u8> = match &buffer.data {
2869 crate::model::piece_tree::BufferData::Loaded { data, .. } => {
2870 let end = (piece_offset + piece_bytes).min(data.len());
2871 data[piece_offset..end].to_vec()
2872 }
2873 crate::model::piece_tree::BufferData::Unloaded {
2874 file_path,
2875 file_offset,
2876 ..
2877 } => {
2878 let read_offset = *file_offset as u64 + piece_offset as u64;
2879 self.fs
2880 .read_range(file_path, read_offset, piece_bytes)
2881 .ok()?
2882 }
2883 };
2884
2885 let mut newlines_found = 0;
2887 for (i, &byte) in piece_data.iter().enumerate() {
2888 if byte == b'\n' {
2889 newlines_found += 1;
2890 if newlines_found == lines_to_skip {
2891 return Some(doc_offset + i + 1);
2893 }
2894 }
2895 }
2896
2897 Some(doc_offset + piece_bytes)
2900 }
2901
2902 pub fn original_file_size(&self) -> Option<usize> {
2906 self.saved_file_size
2909 }
2910
2911 pub fn get_recovery_chunks(&self) -> Vec<(usize, Vec<u8>)> {
2920 use crate::model::piece_tree::BufferLocation;
2921
2922 let mut chunks = Vec::new();
2923 let total = self.total_bytes();
2924
2925 let mut stored_bytes_before = 0;
2931
2932 for piece in self.piece_tree.iter_pieces_in_range(0, total) {
2933 match piece.location {
2934 BufferLocation::Stored(_) => {
2935 stored_bytes_before += piece.bytes;
2937 }
2938 BufferLocation::Added(buffer_id) => {
2939 if let Some(buffer) = self.buffers.iter().find(|b| b.id == buffer_id) {
2940 if let Some(data) = buffer.get_data() {
2942 let start = piece.buffer_offset;
2944 let end = start + piece.bytes;
2945 if end <= data.len() {
2946 chunks.push((stored_bytes_before, data[start..end].to_vec()));
2950 }
2951 }
2952 }
2953 }
2954 }
2955 }
2956
2957 chunks
2958 }
2959
2960 pub fn is_binary(&self) -> bool {
2962 self.is_binary
2963 }
2964
2965 pub fn line_ending(&self) -> LineEnding {
2967 self.line_ending
2968 }
2969
2970 pub fn set_line_ending(&mut self, line_ending: LineEnding) {
2975 self.line_ending = line_ending;
2976 self.mark_content_modified();
2977 }
2978
2979 pub fn set_default_line_ending(&mut self, line_ending: LineEnding) {
2984 self.line_ending = line_ending;
2985 self.original_line_ending = line_ending;
2986 }
2987
2988 pub fn encoding(&self) -> Encoding {
2990 self.encoding
2991 }
2992
2993 pub fn set_encoding(&mut self, encoding: Encoding) {
2998 self.encoding = encoding;
2999 self.mark_content_modified();
3000 }
3001
3002 pub fn set_default_encoding(&mut self, encoding: Encoding) {
3007 self.encoding = encoding;
3008 self.original_encoding = encoding;
3009 }
3010
3011 pub fn detect_line_ending(bytes: &[u8]) -> LineEnding {
3016 let check_len = bytes.len().min(8 * 1024);
3018 let sample = &bytes[..check_len];
3019
3020 let mut crlf_count = 0;
3021 let mut lf_only_count = 0;
3022 let mut cr_only_count = 0;
3023
3024 let mut i = 0;
3025 while i < sample.len() {
3026 if sample[i] == b'\r' {
3027 if i + 1 < sample.len() && sample[i + 1] == b'\n' {
3029 crlf_count += 1;
3030 i += 2; continue;
3032 } else {
3033 cr_only_count += 1;
3035 }
3036 } else if sample[i] == b'\n' {
3037 lf_only_count += 1;
3039 }
3040 i += 1;
3041 }
3042
3043 if crlf_count > lf_only_count && crlf_count > cr_only_count {
3045 LineEnding::CRLF
3046 } else if cr_only_count > lf_only_count && cr_only_count > crlf_count {
3047 LineEnding::CR
3048 } else {
3049 LineEnding::LF
3051 }
3052 }
3053
3054 pub fn detect_encoding(bytes: &[u8]) -> Encoding {
3059 encoding::detect_encoding(bytes)
3060 }
3061
3062 pub fn detect_encoding_or_binary(bytes: &[u8]) -> (Encoding, bool) {
3070 encoding::detect_encoding_or_binary(bytes)
3071 }
3072
3073 pub fn detect_and_convert_encoding(bytes: &[u8]) -> (Encoding, Vec<u8>) {
3078 encoding::detect_and_convert(bytes)
3079 }
3080
3081 pub fn convert_to_encoding(utf8_bytes: &[u8], target_encoding: Encoding) -> Vec<u8> {
3087 encoding::convert_from_utf8(utf8_bytes, target_encoding)
3088 }
3089
3090 #[allow(dead_code)] pub fn normalize_line_endings(bytes: Vec<u8>) -> Vec<u8> {
3097 let mut normalized = Vec::with_capacity(bytes.len());
3098 let mut i = 0;
3099
3100 while i < bytes.len() {
3101 if bytes[i] == b'\r' {
3102 if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
3104 normalized.push(b'\n');
3106 i += 2; continue;
3108 } else {
3109 normalized.push(b'\n');
3111 }
3112 } else {
3113 normalized.push(bytes[i]);
3115 }
3116 i += 1;
3117 }
3118
3119 normalized
3120 }
3121
3122 fn convert_line_endings_to(bytes: &[u8], target_ending: LineEnding) -> Vec<u8> {
3127 let mut normalized = Vec::with_capacity(bytes.len());
3129 let mut i = 0;
3130 while i < bytes.len() {
3131 if bytes[i] == b'\r' {
3132 if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
3134 normalized.push(b'\n');
3136 i += 2;
3137 continue;
3138 } else {
3139 normalized.push(b'\n');
3141 }
3142 } else {
3143 normalized.push(bytes[i]);
3144 }
3145 i += 1;
3146 }
3147
3148 if target_ending == LineEnding::LF {
3150 return normalized;
3151 }
3152
3153 let replacement = target_ending.as_str().as_bytes();
3155 let mut result = Vec::with_capacity(normalized.len() + normalized.len() / 10);
3156
3157 for byte in normalized {
3158 if byte == b'\n' {
3159 result.extend_from_slice(replacement);
3160 } else {
3161 result.push(byte);
3162 }
3163 }
3164
3165 result
3166 }
3167
3168 pub fn get_line(&self, line: usize) -> Option<Vec<u8>> {
3170 let (start, end) = self.piece_tree.line_range(line, &self.buffers)?;
3171
3172 let bytes = if let Some(end_offset) = end {
3173 end_offset.saturating_sub(start)
3174 } else {
3175 self.total_bytes().saturating_sub(start)
3176 };
3177
3178 self.get_text_range(start, bytes)
3179 }
3180
3181 pub fn line_start_offset(&self, line: usize) -> Option<usize> {
3183 let (start, _) = self.piece_tree.line_range(line, &self.buffers)?;
3184 Some(start)
3185 }
3186
3187 pub fn piece_info_at_offset(&self, offset: usize) -> Option<PieceInfo> {
3189 self.piece_tree.find_by_offset(offset)
3190 }
3191
3192 pub fn stats(&self) -> TreeStats {
3194 self.piece_tree.stats()
3195 }
3196
3197 pub fn find_next(&self, pattern: &str, start_pos: usize) -> Option<usize> {
3201 if pattern.is_empty() {
3202 return None;
3203 }
3204
3205 let pattern_bytes = pattern.as_bytes();
3206 let buffer_len = self.len();
3207
3208 if start_pos < buffer_len {
3210 if let Some(offset) = self.find_pattern(start_pos, buffer_len, pattern_bytes) {
3211 return Some(offset);
3212 }
3213 }
3214
3215 if start_pos > 0 {
3217 if let Some(offset) = self.find_pattern(0, start_pos, pattern_bytes) {
3218 return Some(offset);
3219 }
3220 }
3221
3222 None
3223 }
3224
3225 pub fn find_next_in_range(
3229 &self,
3230 pattern: &str,
3231 start_pos: usize,
3232 range: Option<Range<usize>>,
3233 ) -> Option<usize> {
3234 if pattern.is_empty() {
3235 return None;
3236 }
3237
3238 if let Some(search_range) = range {
3239 let pattern_bytes = pattern.as_bytes();
3241 let search_start = start_pos.max(search_range.start);
3242 let search_end = search_range.end.min(self.len());
3243
3244 if search_start < search_end {
3245 self.find_pattern(search_start, search_end, pattern_bytes)
3246 } else {
3247 None
3248 }
3249 } else {
3250 self.find_next(pattern, start_pos)
3252 }
3253 }
3254
3255 fn find_pattern(&self, start: usize, end: usize, pattern: &[u8]) -> Option<usize> {
3257 if pattern.is_empty() || start >= end {
3258 return None;
3259 }
3260
3261 const CHUNK_SIZE: usize = 65536; let overlap = pattern.len().saturating_sub(1).max(1);
3263
3264 let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, overlap);
3266
3267 for chunk in chunks {
3268 if let Some(pos) = Self::find_in_bytes(&chunk.buffer, pattern) {
3270 let match_end = pos + pattern.len();
3271 if match_end > chunk.valid_start {
3274 let absolute_pos = chunk.absolute_pos + pos;
3275 if absolute_pos + pattern.len() <= end {
3277 return Some(absolute_pos);
3278 }
3279 }
3280 }
3281 }
3282
3283 None
3284 }
3285
3286 fn find_in_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
3288 if needle.is_empty() || needle.len() > haystack.len() {
3289 return None;
3290 }
3291
3292 (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
3293 }
3294
3295 pub fn find_next_regex(&self, regex: &Regex, start_pos: usize) -> Option<usize> {
3297 let buffer_len = self.len();
3298
3299 if start_pos < buffer_len {
3301 if let Some(offset) = self.find_regex(start_pos, buffer_len, regex) {
3302 return Some(offset);
3303 }
3304 }
3305
3306 if start_pos > 0 {
3308 if let Some(offset) = self.find_regex(0, start_pos, regex) {
3309 return Some(offset);
3310 }
3311 }
3312
3313 None
3314 }
3315
3316 pub fn find_next_regex_in_range(
3318 &self,
3319 regex: &Regex,
3320 start_pos: usize,
3321 range: Option<Range<usize>>,
3322 ) -> Option<usize> {
3323 if let Some(search_range) = range {
3324 let search_start = start_pos.max(search_range.start);
3325 let search_end = search_range.end.min(self.len());
3326
3327 if search_start < search_end {
3328 self.find_regex(search_start, search_end, regex)
3329 } else {
3330 None
3331 }
3332 } else {
3333 self.find_next_regex(regex, start_pos)
3334 }
3335 }
3336
3337 fn find_regex(&self, start: usize, end: usize, regex: &Regex) -> Option<usize> {
3339 if start >= end {
3340 return None;
3341 }
3342
3343 const CHUNK_SIZE: usize = 1048576; const OVERLAP: usize = 4096; let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, OVERLAP);
3349
3350 for chunk in chunks {
3351 if let Some(mat) = regex.find(&chunk.buffer) {
3353 let match_end = mat.end();
3354 if match_end > chunk.valid_start {
3357 let absolute_pos = chunk.absolute_pos + mat.start();
3358 let match_len = mat.end() - mat.start();
3360 if absolute_pos + match_len <= end {
3361 return Some(absolute_pos);
3362 }
3363 }
3364 }
3365 }
3366
3367 None
3368 }
3369
3370 pub fn replace_range(&mut self, range: Range<usize>, replacement: &str) -> bool {
3372 if range.start >= self.len() {
3373 return false;
3374 }
3375
3376 let end = range.end.min(self.len());
3377 if end > range.start {
3378 self.delete_bytes(range.start, end - range.start);
3379 }
3380
3381 if !replacement.is_empty() {
3382 self.insert(range.start, replacement);
3383 }
3384
3385 true
3386 }
3387
3388 pub fn replace_next(
3390 &mut self,
3391 pattern: &str,
3392 replacement: &str,
3393 start_pos: usize,
3394 range: Option<Range<usize>>,
3395 ) -> Option<usize> {
3396 if let Some(pos) = self.find_next_in_range(pattern, start_pos, range.clone()) {
3397 self.replace_range(pos..pos + pattern.len(), replacement);
3398 Some(pos)
3399 } else {
3400 None
3401 }
3402 }
3403
3404 pub fn replace_all(&mut self, pattern: &str, replacement: &str) -> usize {
3406 if pattern.is_empty() {
3407 return 0;
3408 }
3409
3410 let mut count = 0;
3411 let mut pos = 0;
3412
3413 while let Some(found_pos) = self.find_next_in_range(pattern, pos, Some(0..self.len())) {
3417 self.replace_range(found_pos..found_pos + pattern.len(), replacement);
3418 count += 1;
3419
3420 pos = found_pos + replacement.len();
3422
3423 if pos >= self.len() {
3425 break;
3426 }
3427 }
3428
3429 count
3430 }
3431
3432 pub fn replace_all_regex(&mut self, regex: &Regex, replacement: &str) -> Result<usize> {
3434 let mut count = 0;
3435 let mut pos = 0;
3436
3437 while let Some(found_pos) = self.find_next_regex_in_range(regex, pos, Some(0..self.len())) {
3438 let text = self
3440 .get_text_range_mut(found_pos, self.len() - found_pos)
3441 .context("Failed to read text for regex match")?;
3442
3443 if let Some(mat) = regex.find(&text) {
3444 self.replace_range(found_pos..found_pos + mat.len(), replacement);
3445 count += 1;
3446 pos = found_pos + replacement.len();
3447
3448 if pos >= self.len() {
3449 break;
3450 }
3451 } else {
3452 break;
3453 }
3454 }
3455
3456 Ok(count)
3457 }
3458
3459 pub fn position_to_line_col(&self, byte_pos: usize) -> (usize, usize) {
3463 self.offset_to_position(byte_pos)
3464 .map(|pos| (pos.line, pos.column))
3465 .unwrap_or_else(|| (byte_pos / 80, 0)) }
3467
3468 pub fn line_col_to_position(&self, line: usize, character: usize) -> usize {
3472 if let Some((start, end)) = self.piece_tree.line_range(line, &self.buffers) {
3473 let line_len = if let Some(end_offset) = end {
3475 end_offset.saturating_sub(start)
3476 } else {
3477 self.total_bytes().saturating_sub(start)
3478 };
3479 let byte_offset = character.min(line_len);
3480 start + byte_offset
3481 } else {
3482 self.len()
3484 }
3485 }
3486
3487 pub fn position_to_lsp_position(&self, byte_pos: usize) -> (usize, usize) {
3490 let (line, column_bytes) = self
3491 .offset_to_position(byte_pos)
3492 .map(|pos| (pos.line, pos.column))
3493 .unwrap_or_else(|| (byte_pos / 80, 0)); if let Some(line_bytes) = self.get_line(line) {
3497 let text_before = &line_bytes[..column_bytes.min(line_bytes.len())];
3499 let text_str = String::from_utf8_lossy(text_before);
3500 let utf16_offset = text_str.encode_utf16().count();
3501 (line, utf16_offset)
3502 } else {
3503 (line, 0)
3504 }
3505 }
3506
3507 pub fn lsp_position_to_byte(&self, line: usize, utf16_offset: usize) -> usize {
3511 if let Some((line_start, end)) = self.piece_tree.line_range(line, &self.buffers) {
3512 let line_len = if let Some(end_offset) = end {
3514 end_offset.saturating_sub(line_start)
3515 } else {
3516 self.total_bytes().saturating_sub(line_start)
3517 };
3518
3519 if line_len > 0 {
3520 let Some(line_bytes) = self.get_text_range(line_start, line_len) else {
3522 return line_start;
3523 };
3524 let line_str = String::from_utf8_lossy(&line_bytes);
3525
3526 let mut utf16_count = 0;
3528 let mut byte_offset = 0;
3529
3530 for ch in line_str.chars() {
3531 if utf16_count >= utf16_offset {
3532 break;
3533 }
3534 utf16_count += ch.len_utf16();
3535 byte_offset += ch.len_utf8();
3536 }
3537
3538 line_start + byte_offset
3539 } else {
3540 line_start
3541 }
3542 } else {
3543 self.len()
3545 }
3546 }
3547
3548 pub fn prev_char_boundary(&self, pos: usize) -> usize {
3552 if pos == 0 {
3553 return 0;
3554 }
3555
3556 let start = pos.saturating_sub(4);
3558 let Some(bytes) = self.get_text_range(start, pos - start) else {
3559 return pos;
3561 };
3562
3563 for i in (0..bytes.len()).rev() {
3565 let byte = bytes[i];
3566 if (byte & 0b1100_0000) != 0b1000_0000 {
3568 return start + i;
3569 }
3570 }
3571
3572 pos.saturating_sub(1)
3574 }
3575
3576 pub fn next_char_boundary(&self, pos: usize) -> usize {
3578 let len = self.len();
3579 if pos >= len {
3580 return len;
3581 }
3582
3583 let end = (pos + 5).min(len);
3585 let Some(bytes) = self.get_text_range(pos, end - pos) else {
3586 return pos;
3588 };
3589
3590 for (i, &byte) in bytes.iter().enumerate().skip(1) {
3592 if (byte & 0b1100_0000) != 0b1000_0000 {
3594 return pos + i;
3595 }
3596 }
3597
3598 end
3600 }
3601
3602 #[inline]
3606 fn is_utf8_continuation_byte(byte: u8) -> bool {
3607 (byte & 0b1100_0000) == 0b1000_0000
3608 }
3609
3610 pub fn snap_to_char_boundary(&self, pos: usize) -> usize {
3614 let len = self.len();
3615 if pos == 0 || pos >= len {
3616 return pos.min(len);
3617 }
3618
3619 let Some(bytes) = self.get_text_range(pos, 1) else {
3621 return pos;
3623 };
3624
3625 if !Self::is_utf8_continuation_byte(bytes[0]) {
3627 return pos;
3629 }
3630
3631 self.prev_char_boundary(pos)
3633 }
3634
3635 pub fn prev_grapheme_boundary(&self, pos: usize) -> usize {
3641 if pos == 0 {
3642 return 0;
3643 }
3644
3645 let raw_start = pos.saturating_sub(32);
3650 let start = if raw_start == 0 {
3651 0
3652 } else {
3653 self.prev_char_boundary(raw_start + 1)
3655 };
3656
3657 let Some(bytes) = self.get_text_range(start, pos - start) else {
3658 return self.prev_char_boundary(pos);
3660 };
3661
3662 let text = match std::str::from_utf8(&bytes) {
3663 Ok(s) => s,
3664 Err(e) => {
3665 let valid_bytes = &bytes[..e.valid_up_to()];
3668 match std::str::from_utf8(valid_bytes) {
3669 Ok(s) if !s.is_empty() => s,
3670 _ => return self.prev_char_boundary(pos),
3671 }
3672 }
3673 };
3674
3675 let rel_pos = pos - start;
3677 let new_rel_pos = grapheme::prev_grapheme_boundary(text, rel_pos);
3678
3679 if new_rel_pos == 0 && start > 0 {
3682 return self.prev_grapheme_boundary(start);
3683 }
3684
3685 start + new_rel_pos
3686 }
3687
3688 pub fn next_grapheme_boundary(&self, pos: usize) -> usize {
3694 let len = self.len();
3695 if pos >= len {
3696 return len;
3697 }
3698
3699 let end = (pos + 32).min(len);
3702 let Some(bytes) = self.get_text_range(pos, end - pos) else {
3703 return self.next_char_boundary(pos);
3705 };
3706
3707 let text = match std::str::from_utf8(&bytes) {
3710 Ok(s) => s,
3711 Err(e) => {
3712 let valid_bytes = &bytes[..e.valid_up_to()];
3715 match std::str::from_utf8(valid_bytes) {
3716 Ok(s) if !s.is_empty() => s,
3717 _ => return self.next_char_boundary(pos),
3718 }
3719 }
3720 };
3721
3722 let new_rel_pos = grapheme::next_grapheme_boundary(text, 0);
3724 pos + new_rel_pos
3725 }
3726
3727 pub fn prev_word_boundary(&self, pos: usize) -> usize {
3729 if pos == 0 {
3730 return 0;
3731 }
3732
3733 let start = pos.saturating_sub(256).max(0);
3735 let Some(bytes) = self.get_text_range(start, pos - start) else {
3736 return pos;
3738 };
3739 let text = String::from_utf8_lossy(&bytes);
3740
3741 let mut found_word_char = false;
3742 let chars: Vec<char> = text.chars().collect();
3743
3744 for i in (0..chars.len()).rev() {
3745 let ch = chars[i];
3746 let is_word_char = ch.is_alphanumeric() || ch == '_';
3747
3748 if found_word_char && !is_word_char {
3749 let byte_offset: usize = chars[0..=i].iter().map(|c| c.len_utf8()).sum();
3752 return start + byte_offset;
3753 }
3754
3755 if is_word_char {
3756 found_word_char = true;
3757 }
3758 }
3759
3760 0
3761 }
3762
3763 pub fn next_word_boundary(&self, pos: usize) -> usize {
3765 let len = self.len();
3766 if pos >= len {
3767 return len;
3768 }
3769
3770 let end = (pos + 256).min(len);
3772 let Some(bytes) = self.get_text_range(pos, end - pos) else {
3773 return pos;
3775 };
3776 let text = String::from_utf8_lossy(&bytes);
3777
3778 let mut found_word_char = false;
3779 let mut byte_offset = 0;
3780
3781 for ch in text.chars() {
3782 let is_word_char = ch.is_alphanumeric() || ch == '_';
3783
3784 if found_word_char && !is_word_char {
3785 return pos + byte_offset;
3787 }
3788
3789 if is_word_char {
3790 found_word_char = true;
3791 }
3792
3793 byte_offset += ch.len_utf8();
3794 }
3795
3796 len
3797 }
3798
3799 pub fn line_iterator(
3804 &mut self,
3805 byte_pos: usize,
3806 estimated_line_length: usize,
3807 ) -> LineIterator<'_> {
3808 LineIterator::new(self, byte_pos, estimated_line_length)
3809 }
3810
3811 pub fn iter_lines_from(
3825 &mut self,
3826 byte_pos: usize,
3827 max_lines: usize,
3828 ) -> Result<TextBufferLineIterator> {
3829 TextBufferLineIterator::new(self, byte_pos, max_lines)
3830 }
3831
3832 pub fn get_line_number(&self, byte_offset: usize) -> usize {
3845 self.offset_to_position(byte_offset)
3846 .map(|pos| pos.line)
3847 .unwrap_or_else(|| {
3848 byte_offset / self.config.estimated_line_length
3850 })
3851 }
3852
3853 pub fn estimated_line_length(&self) -> usize {
3855 self.config.estimated_line_length
3856 }
3857
3858 pub fn populate_line_cache(&mut self, start_byte: usize, _line_count: usize) -> usize {
3892 self.get_line_number(start_byte)
3895 }
3896
3897 pub fn get_cached_byte_offset_for_line(&self, line_number: usize) -> Option<usize> {
3899 self.line_start_offset(line_number)
3900 }
3901
3902 pub fn invalidate_line_cache_from(&mut self, _byte_offset: usize) {
3904 }
3906
3907 pub fn handle_line_cache_insertion(&mut self, _byte_offset: usize, _bytes_inserted: usize) {
3909 }
3911
3912 pub fn handle_line_cache_deletion(&mut self, _byte_offset: usize, _bytes_deleted: usize) {
3914 }
3916
3917 pub fn clear_line_cache(&mut self) {
3919 }
3921
3922 #[cfg(test)]
3926 pub fn from_str_test(s: &str) -> Self {
3927 Self::from_bytes(
3928 s.as_bytes().to_vec(),
3929 std::sync::Arc::new(crate::model::filesystem::StdFileSystem),
3930 )
3931 }
3932
3933 #[cfg(test)]
3935 pub fn new_test() -> Self {
3936 Self::empty(std::sync::Arc::new(crate::model::filesystem::StdFileSystem))
3937 }
3938}
3939
3940pub type Buffer = TextBuffer;
3942
3943pub use crate::primitives::line_iterator::LineIterator;
3945
3946#[derive(Debug)]
3952pub struct ChunkInfo {
3953 pub buffer: Vec<u8>,
3955
3956 pub absolute_pos: usize,
3958
3959 pub valid_start: usize,
3962}
3963
3964pub struct OverlappingChunks<'a> {
3992 piece_iter: PieceRangeIter,
3993 buffers: &'a [StringBuffer],
3994
3995 buffer: Vec<u8>,
3997 buffer_absolute_pos: usize,
3998
3999 current_pos: usize,
4001 end_pos: usize,
4002
4003 chunk_size: usize,
4005 overlap: usize,
4006
4007 first_chunk: bool,
4009
4010 current_piece_data: Option<Vec<u8>>,
4012 current_piece_offset: usize,
4013}
4014
4015impl<'a> OverlappingChunks<'a> {
4016 pub fn new(
4031 text_buffer: &'a TextBuffer,
4032 start: usize,
4033 end: usize,
4034 chunk_size: usize,
4035 overlap: usize,
4036 ) -> Self {
4037 let piece_iter = text_buffer.piece_tree.iter_pieces_in_range(start, end);
4038
4039 Self {
4040 piece_iter,
4041 buffers: &text_buffer.buffers,
4042 buffer: Vec::with_capacity(chunk_size + overlap),
4043 buffer_absolute_pos: start,
4044 current_pos: start,
4045 end_pos: end,
4046 chunk_size,
4047 overlap,
4048 first_chunk: true,
4049 current_piece_data: None,
4050 current_piece_offset: 0,
4051 }
4052 }
4053
4054 fn read_byte(&mut self) -> Option<u8> {
4056 loop {
4057 if let Some(ref data) = self.current_piece_data {
4059 if self.current_piece_offset < data.len() {
4060 let byte = data[self.current_piece_offset];
4061 self.current_piece_offset += 1;
4062 self.current_pos += 1;
4063 return Some(byte);
4064 } else {
4065 self.current_piece_data = None;
4067 self.current_piece_offset = 0;
4068 }
4069 }
4070
4071 if let Some(piece_view) = self.piece_iter.next() {
4073 let buffer_id = piece_view.location.buffer_id();
4074 if let Some(buffer) = self.buffers.get(buffer_id) {
4075 let piece_start_in_doc = piece_view.doc_offset;
4077 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
4078
4079 let read_start = self.current_pos.max(piece_start_in_doc);
4081 let read_end = self.end_pos.min(piece_end_in_doc);
4082
4083 if read_end > read_start {
4084 let offset_in_piece = read_start - piece_start_in_doc;
4085 let bytes_to_read = read_end - read_start;
4086
4087 let buffer_start = piece_view.buffer_offset + offset_in_piece;
4088 let buffer_end = buffer_start + bytes_to_read;
4089
4090 if let Some(data) = buffer.get_data() {
4091 if buffer_end <= data.len() {
4092 self.current_piece_data =
4094 Some(data[buffer_start..buffer_end].to_vec());
4095 self.current_piece_offset = 0;
4096 continue;
4097 }
4098 }
4099 }
4100 }
4101 }
4102
4103 return None;
4105 }
4106 }
4107
4108 fn fill_next_chunk(&mut self) -> bool {
4110 if self.first_chunk {
4111 self.first_chunk = false;
4113 while self.buffer.len() < self.chunk_size && self.current_pos < self.end_pos {
4114 if let Some(byte) = self.read_byte() {
4115 self.buffer.push(byte);
4116 } else {
4117 break;
4118 }
4119 }
4120 !self.buffer.is_empty()
4121 } else {
4122 if self.current_pos >= self.end_pos {
4124 return false;
4125 }
4126
4127 if self.buffer.len() > self.overlap {
4129 let drain_amount = self.buffer.len() - self.overlap;
4130 self.buffer.drain(0..drain_amount);
4131 self.buffer_absolute_pos += drain_amount;
4132 }
4133
4134 let before_len = self.buffer.len();
4136 let target_len = self.overlap + self.chunk_size;
4137 while self.buffer.len() < target_len && self.current_pos < self.end_pos {
4138 if let Some(byte) = self.read_byte() {
4139 self.buffer.push(byte);
4140 } else {
4141 break;
4142 }
4143 }
4144
4145 self.buffer.len() > before_len
4147 }
4148 }
4149}
4150
4151impl<'a> Iterator for OverlappingChunks<'a> {
4152 type Item = ChunkInfo;
4153
4154 fn next(&mut self) -> Option<Self::Item> {
4155 let is_first = self.buffer_absolute_pos == self.current_pos;
4157
4158 if !self.fill_next_chunk() {
4159 return None;
4160 }
4161
4162 let valid_start = if is_first {
4165 0
4166 } else {
4167 self.overlap.min(self.buffer.len())
4168 };
4169
4170 Some(ChunkInfo {
4171 buffer: self.buffer.clone(),
4172 absolute_pos: self.buffer_absolute_pos,
4173 valid_start,
4174 })
4175 }
4176}
4177
4178#[cfg(test)]
4179mod tests {
4180 use crate::model::filesystem::StdFileSystem;
4181 use std::sync::Arc;
4182
4183 fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
4184 Arc::new(StdFileSystem)
4185 }
4186 use super::*;
4187
4188 #[test]
4189 fn test_empty_buffer() {
4190 let buffer = TextBuffer::empty(test_fs());
4191 assert_eq!(buffer.total_bytes(), 0);
4192 assert_eq!(buffer.line_count(), Some(1)); }
4194
4195 #[test]
4196 fn test_line_positions_multiline() {
4197 let buffer = TextBuffer::from_bytes(b"Hello\nNew Line\nWorld!".to_vec(), test_fs());
4198
4199 assert_eq!(buffer.line_count(), Some(3));
4201
4202 assert_eq!(buffer.line_start_offset(0), Some(0)); assert_eq!(buffer.line_start_offset(1), Some(6)); assert_eq!(buffer.line_start_offset(2), Some(15)); assert_eq!(buffer.offset_to_position(0).unwrap().line, 0); assert_eq!(buffer.offset_to_position(5).unwrap().line, 0); assert_eq!(buffer.offset_to_position(6).unwrap().line, 1); assert_eq!(buffer.offset_to_position(14).unwrap().line, 1); assert_eq!(buffer.offset_to_position(15).unwrap().line, 2); assert_eq!(buffer.line_col_to_position(0, 5), 5); assert_eq!(buffer.line_col_to_position(1, 0), 6); assert_eq!(buffer.line_col_to_position(1, 8), 14); assert_eq!(buffer.line_col_to_position(2, 0), 15); }
4220
4221 #[test]
4222 fn test_new_from_content() {
4223 let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4224 assert_eq!(buffer.total_bytes(), 11);
4225 assert_eq!(buffer.line_count(), Some(2));
4226 }
4227
4228 #[test]
4229 fn test_get_all_text() {
4230 let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4231 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld");
4232 }
4233
4234 #[test]
4235 fn test_insert_at_start() {
4236 let mut buffer = TextBuffer::from_bytes(b"world".to_vec(), test_fs());
4237 buffer.insert_bytes(0, b"hello ".to_vec());
4238
4239 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4240 assert_eq!(buffer.total_bytes(), 11);
4241 }
4242
4243 #[test]
4244 fn test_insert_in_middle() {
4245 let mut buffer = TextBuffer::from_bytes(b"helloworld".to_vec(), test_fs());
4246 buffer.insert_bytes(5, b" ".to_vec());
4247
4248 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4249 assert_eq!(buffer.total_bytes(), 11);
4250 }
4251
4252 #[test]
4253 fn test_insert_at_end() {
4254 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4255 buffer.insert_bytes(5, b" world".to_vec());
4256
4257 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4258 assert_eq!(buffer.total_bytes(), 11);
4259 }
4260
4261 #[test]
4262 fn test_insert_with_newlines() {
4263 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4264 buffer.insert_bytes(5, b"\nworld\ntest".to_vec());
4265
4266 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld\ntest");
4267 assert_eq!(buffer.line_count(), Some(3));
4268 }
4269
4270 #[test]
4271 fn test_delete_from_start() {
4272 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4273 buffer.delete_bytes(0, 6);
4274
4275 assert_eq!(buffer.get_all_text().unwrap(), b"world");
4276 assert_eq!(buffer.total_bytes(), 5);
4277 }
4278
4279 #[test]
4280 fn test_delete_from_middle() {
4281 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4282 buffer.delete_bytes(5, 1);
4283
4284 assert_eq!(buffer.get_all_text().unwrap(), b"helloworld");
4285 assert_eq!(buffer.total_bytes(), 10);
4286 }
4287
4288 #[test]
4289 fn test_delete_from_end() {
4290 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4291 buffer.delete_bytes(6, 5);
4292
4293 assert_eq!(buffer.get_all_text().unwrap(), b"hello ");
4294 assert_eq!(buffer.total_bytes(), 6);
4295 }
4296
4297 #[test]
4298 fn test_delete_with_newlines() {
4299 let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4300 buffer.delete_bytes(5, 7); assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
4303 assert_eq!(buffer.line_count(), Some(1));
4304 }
4305
4306 #[test]
4307 fn test_offset_position_conversions() {
4308 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4309
4310 let pos = buffer.offset_to_position(0);
4311 assert_eq!(pos, Some(Position { line: 0, column: 0 }));
4312
4313 let pos = buffer.offset_to_position(6);
4314 assert_eq!(pos, Some(Position { line: 1, column: 0 }));
4315
4316 let offset = buffer.position_to_offset(Position { line: 1, column: 0 });
4317 assert_eq!(offset, 6);
4318 }
4319
4320 #[test]
4321 fn test_insert_at_position() {
4322 let mut buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4323 buffer.insert_at_position(Position { line: 1, column: 0 }, b"beautiful ".to_vec());
4324
4325 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nbeautiful world");
4326 }
4327
4328 #[test]
4329 fn test_delete_range() {
4330 let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4331
4332 let start = Position { line: 0, column: 5 };
4333 let end = Position { line: 2, column: 0 };
4334 buffer.delete_range(start, end);
4335
4336 assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
4337 }
4338
4339 #[test]
4340 fn test_get_line() {
4341 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4342
4343 assert_eq!(buffer.get_line(0), Some(b"hello\n".to_vec()));
4344 assert_eq!(buffer.get_line(1), Some(b"world\n".to_vec()));
4345 assert_eq!(buffer.get_line(2), Some(b"test".to_vec()));
4346 assert_eq!(buffer.get_line(3), None);
4347 }
4348
4349 #[test]
4350 fn test_multiple_operations() {
4351 let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
4352
4353 buffer.insert_bytes(0, b"start\n".to_vec());
4354 assert_eq!(buffer.line_count(), Some(4));
4355
4356 buffer.delete_bytes(6, 6); assert_eq!(buffer.line_count(), Some(3));
4358
4359 buffer.insert_bytes(6, b"new\n".to_vec());
4360 assert_eq!(buffer.line_count(), Some(4));
4361
4362 let text = buffer.get_all_text().unwrap();
4363 assert_eq!(text, b"start\nnew\nline2\nline3");
4364 }
4365
4366 #[test]
4367 fn test_get_text_range() {
4368 let buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4369
4370 assert_eq!(buffer.get_text_range(0, 5), Some(b"hello".to_vec()));
4371 assert_eq!(buffer.get_text_range(6, 5), Some(b"world".to_vec()));
4372 assert_eq!(buffer.get_text_range(0, 11), Some(b"hello world".to_vec()));
4373 }
4374
4375 #[test]
4376 fn test_empty_operations() {
4377 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4378
4379 buffer.insert_bytes(2, Vec::new());
4380 assert_eq!(buffer.get_all_text().unwrap(), b"hello");
4381
4382 buffer.delete_bytes(2, 0);
4383 assert_eq!(buffer.get_all_text().unwrap(), b"hello");
4384 }
4385
4386 #[test]
4387 fn test_sequential_inserts_at_beginning() {
4388 let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
4390
4391 buffer.delete_bytes(0, 12);
4393 assert_eq!(buffer.get_all_text().unwrap(), b"");
4394
4395 buffer.insert_bytes(0, vec![b'a']);
4397 assert_eq!(buffer.get_all_text().unwrap(), b"a");
4398
4399 buffer.insert_bytes(0, vec![b'b']);
4401 assert_eq!(buffer.get_all_text().unwrap(), b"ba");
4402 }
4403
4404 mod large_file_support {
4407 use super::*;
4408 use crate::model::piece_tree::StringBuffer;
4409 use std::fs::File;
4410 use std::io::Write;
4411 use tempfile::TempDir;
4412
4413 #[test]
4416 fn test_line_feed_count_is_some_for_loaded_buffer() {
4417 let buffer = StringBuffer::new(0, b"hello\nworld\ntest".to_vec());
4418 assert_eq!(buffer.line_feed_count(), Some(2));
4419 }
4420
4421 #[test]
4422 fn test_line_feed_count_is_none_for_unloaded_buffer() {
4423 let temp_dir = TempDir::new().unwrap();
4424 let file_path = temp_dir.path().join("test.txt");
4425
4426 let buffer = StringBuffer::new_unloaded(0, file_path, 0, 100);
4427 assert_eq!(buffer.line_feed_count(), None);
4428 }
4429
4430 #[test]
4431 fn test_line_count_is_some_for_small_buffer() {
4432 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4433 assert_eq!(buffer.line_count(), Some(3));
4434 }
4435
4436 #[test]
4437 fn test_piece_tree_works_with_none_line_count() {
4438 let buffer = StringBuffer::new_loaded(0, b"hello\nworld".to_vec(), false);
4440 assert_eq!(buffer.line_feed_count(), None);
4441
4442 use crate::model::piece_tree::{BufferLocation, PieceTree};
4444 let tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, None);
4445
4446 assert_eq!(tree.line_count(), None);
4448 }
4449
4450 #[test]
4453 fn test_buffer_data_loaded_variant() {
4454 let data = b"hello world".to_vec();
4455 let buffer = StringBuffer::new_loaded(0, data.clone(), true);
4456
4457 assert!(buffer.is_loaded());
4458 assert_eq!(buffer.get_data(), Some(&data[..]));
4459 assert!(buffer.get_line_starts().is_some());
4460 }
4461
4462 #[test]
4463 fn test_buffer_data_loaded_without_line_starts() {
4464 let data = b"hello\nworld".to_vec();
4465 let buffer = StringBuffer::new_loaded(0, data.clone(), false);
4466
4467 assert!(buffer.is_loaded());
4468 assert_eq!(buffer.get_data(), Some(&data[..]));
4469 assert_eq!(buffer.get_line_starts(), None); }
4471
4472 #[test]
4473 fn test_buffer_data_unloaded_variant() {
4474 let temp_dir = TempDir::new().unwrap();
4475 let file_path = temp_dir.path().join("test.txt");
4476
4477 let buffer = StringBuffer::new_unloaded(0, file_path.clone(), 0, 100);
4478
4479 assert!(!buffer.is_loaded());
4480 assert_eq!(buffer.get_data(), None);
4481 assert_eq!(buffer.get_line_starts(), None);
4482 }
4483
4484 #[test]
4485 fn test_buffer_load_method() {
4486 let temp_dir = TempDir::new().unwrap();
4487 let file_path = temp_dir.path().join("test.txt");
4488
4489 let test_data = b"hello world";
4491 File::create(&file_path)
4492 .unwrap()
4493 .write_all(test_data)
4494 .unwrap();
4495
4496 let mut buffer = StringBuffer::new_unloaded(0, file_path, 0, test_data.len());
4498 assert!(!buffer.is_loaded());
4499
4500 let fs = crate::model::filesystem::StdFileSystem;
4502 buffer.load(&fs).unwrap();
4503
4504 assert!(buffer.is_loaded());
4506 assert_eq!(buffer.get_data(), Some(&test_data[..]));
4507 }
4508
4509 #[test]
4510 fn test_string_buffer_new_vs_new_loaded() {
4511 let data = b"hello\nworld".to_vec();
4512
4513 let buf1 = StringBuffer::new(0, data.clone());
4515 assert!(buf1.is_loaded());
4516 assert!(buf1.get_line_starts().is_some());
4517 assert_eq!(buf1.line_feed_count(), Some(1));
4518
4519 let buf2 = StringBuffer::new_loaded(0, data.clone(), false);
4521 assert!(buf2.is_loaded());
4522 assert_eq!(buf2.get_line_starts(), None);
4523 assert_eq!(buf2.line_feed_count(), None);
4524 }
4525
4526 #[test]
4529 fn test_load_small_file_eager_loading() {
4530 let temp_dir = TempDir::new().unwrap();
4531 let file_path = temp_dir.path().join("small.txt");
4532
4533 let test_data = b"hello\ntest";
4535 File::create(&file_path)
4536 .unwrap()
4537 .write_all(test_data)
4538 .unwrap();
4539
4540 let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
4542
4543 assert!(!buffer.large_file);
4545 assert_eq!(buffer.total_bytes(), test_data.len());
4546 assert_eq!(buffer.line_count(), Some(2)); assert_eq!(buffer.get_all_text().unwrap(), test_data);
4548
4549 assert!(buffer.buffers[0].is_loaded());
4551 }
4552
4553 #[test]
4554 fn test_load_large_file_lazy_loading() {
4555 let temp_dir = TempDir::new().unwrap();
4556 let file_path = temp_dir.path().join("large.txt");
4557
4558 let test_data = b"hello\nworld\ntest";
4560 File::create(&file_path)
4561 .unwrap()
4562 .write_all(test_data)
4563 .unwrap();
4564
4565 let buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
4567
4568 assert!(buffer.large_file);
4570 assert_eq!(buffer.total_bytes(), test_data.len());
4571
4572 assert_eq!(buffer.line_count(), None);
4574
4575 assert!(!buffer.buffers[0].is_loaded());
4577 assert_eq!(buffer.buffers[0].get_data(), None);
4578 }
4579
4580 #[test]
4588 fn test_issue_657_search_on_large_file_unloaded_buffer() {
4589 let temp_dir = TempDir::new().unwrap();
4590 let file_path = temp_dir.path().join("large_search_test.txt");
4591
4592 let test_data = b"line1\nline2\nSEARCH_TARGET\nline4\nline5";
4594 File::create(&file_path)
4595 .unwrap()
4596 .write_all(test_data)
4597 .unwrap();
4598
4599 let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
4601
4602 assert!(buffer.large_file, "Buffer should be in large file mode");
4604 assert!(
4605 !buffer.buffers[0].is_loaded(),
4606 "Buffer should be unloaded initially"
4607 );
4608
4609 assert!(
4612 buffer.to_string().is_none(),
4613 "BUG REPRODUCED: to_string() returns None for unloaded buffer"
4614 );
4615
4616 let total_bytes = buffer.len();
4618 let content = buffer.get_text_range_mut(0, total_bytes).unwrap();
4619 let content_str = String::from_utf8_lossy(&content);
4620
4621 assert!(
4623 content_str.contains("SEARCH_TARGET"),
4624 "FIX WORKS: get_text_range_mut() loaded the buffer and found the search target"
4625 );
4626
4627 assert!(
4629 buffer.to_string().is_some(),
4630 "After get_text_range_mut(), to_string() should work"
4631 );
4632 }
4633
4634 #[test]
4635 fn test_large_file_threshold_boundary() {
4636 let temp_dir = TempDir::new().unwrap();
4637
4638 let file_path = temp_dir.path().join("at_threshold.txt");
4640 let test_data = vec![b'x'; 100];
4641 File::create(&file_path)
4642 .unwrap()
4643 .write_all(&test_data)
4644 .unwrap();
4645
4646 let buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
4648 assert!(buffer.large_file);
4649
4650 let file_path2 = temp_dir.path().join("below_threshold.txt");
4652 let test_data2 = vec![b'x'; 99];
4653 File::create(&file_path2)
4654 .unwrap()
4655 .write_all(&test_data2)
4656 .unwrap();
4657
4658 let buffer2 = TextBuffer::load_from_file(&file_path2, 100, test_fs()).unwrap();
4660 assert!(!buffer2.large_file);
4661 }
4662
4663 #[test]
4664 fn test_large_file_default_threshold() {
4665 let temp_dir = TempDir::new().unwrap();
4666 let file_path = temp_dir.path().join("test.txt");
4667
4668 File::create(&file_path)
4670 .unwrap()
4671 .write_all(b"hello")
4672 .unwrap();
4673
4674 let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
4676
4677 assert!(!buffer.large_file);
4679 }
4680
4681 #[test]
4682 fn test_large_file_has_correct_piece_tree_structure() {
4683 let temp_dir = TempDir::new().unwrap();
4684 let file_path = temp_dir.path().join("large.txt");
4685
4686 let test_data = b"hello world";
4687 File::create(&file_path)
4688 .unwrap()
4689 .write_all(test_data)
4690 .unwrap();
4691
4692 let buffer = TextBuffer::load_from_file(&file_path, 5, test_fs()).unwrap();
4694
4695 assert_eq!(buffer.total_bytes(), test_data.len());
4697
4698 assert_eq!(buffer.buffers.len(), 1);
4700
4701 assert!(!buffer.buffers[0].is_loaded());
4703 }
4704
4705 #[test]
4706 fn test_empty_large_file() {
4707 let temp_dir = TempDir::new().unwrap();
4708 let file_path = temp_dir.path().join("empty.txt");
4709
4710 File::create(&file_path).unwrap();
4712
4713 let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
4715
4716 assert_eq!(buffer.total_bytes(), 0);
4718 assert!(buffer.is_empty());
4719 }
4720
4721 #[test]
4722 fn test_large_file_basic_api_operations() {
4723 let temp_dir = TempDir::new().unwrap();
4724 let file_path = temp_dir.path().join("large_test.txt");
4725
4726 let test_data = b"line1\nline2\nline3\nline4\n";
4728 File::create(&file_path)
4729 .unwrap()
4730 .write_all(test_data)
4731 .unwrap();
4732
4733 let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
4735
4736 assert!(buffer.large_file);
4738 assert_eq!(buffer.line_count(), None); assert_eq!(buffer.total_bytes(), test_data.len());
4742 assert!(!buffer.is_empty());
4743 assert_eq!(buffer.len(), test_data.len());
4744
4745 let range_result = buffer.get_text_range_mut(0, 5).unwrap();
4747 assert_eq!(range_result, b"line1");
4748
4749 let range_result2 = buffer.get_text_range_mut(6, 5).unwrap();
4750 assert_eq!(range_result2, b"line2");
4751
4752 let all_text = buffer.get_all_text().unwrap();
4754 assert_eq!(all_text, test_data);
4755
4756 assert_eq!(buffer.slice_bytes(0..5), b"line1");
4758
4759 buffer.insert_bytes(0, b"prefix_".to_vec());
4762 assert_eq!(buffer.total_bytes(), test_data.len() + 7);
4763 assert!(buffer.is_modified());
4764
4765 let text_after_insert = buffer.get_all_text().unwrap();
4767 assert_eq!(&text_after_insert[0..7], b"prefix_");
4768 assert_eq!(&text_after_insert[7..12], b"line1");
4769
4770 buffer.delete_bytes(0, 7);
4772 assert_eq!(buffer.total_bytes(), test_data.len());
4773
4774 let text_after_delete = buffer.get_all_text().unwrap();
4776 assert_eq!(text_after_delete, test_data);
4777
4778 let end_offset = buffer.total_bytes();
4780 buffer.insert_bytes(end_offset, b"suffix".to_vec());
4781 assert_eq!(buffer.total_bytes(), test_data.len() + 6);
4782
4783 let final_text = buffer.get_all_text().unwrap();
4785 assert!(final_text.ends_with(b"suffix"));
4786 assert_eq!(&final_text[0..test_data.len()], test_data);
4787
4788 let pos = buffer.offset_to_position(0).unwrap();
4792 assert_eq!(pos.column, 0);
4793
4794 let offset = buffer.position_to_offset(Position { line: 0, column: 0 });
4796 assert_eq!(offset, 0);
4797
4798 let replace_result = buffer.replace_range(0..5, "START");
4800 assert!(replace_result);
4801
4802 let text_after_replace = buffer.get_all_text().unwrap();
4803 assert!(text_after_replace.starts_with(b"START"));
4804 }
4805
4806 #[test]
4807 fn test_large_file_chunk_based_loading() {
4808 let temp_dir = TempDir::new().unwrap();
4809 let file_path = temp_dir.path().join("huge.txt");
4810
4811 let chunk_size = LOAD_CHUNK_SIZE; let file_size = chunk_size * 3; let mut file = File::create(&file_path).unwrap();
4818 file.write_all(&vec![b'A'; chunk_size]).unwrap();
4819 file.write_all(&vec![b'B'; chunk_size]).unwrap();
4820 file.write_all(&vec![b'C'; chunk_size]).unwrap();
4821 file.flush().unwrap();
4822
4823 let mut buffer = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
4825
4826 assert!(buffer.large_file);
4828 assert_eq!(buffer.total_bytes(), file_size);
4829
4830 assert!(!buffer.buffers[0].is_loaded());
4832
4833 let first_chunk_data = buffer.get_text_range_mut(0, 1024).unwrap();
4835 assert_eq!(first_chunk_data.len(), 1024);
4836 assert!(first_chunk_data.iter().all(|&b| b == b'A'));
4837
4838 let second_chunk_data = buffer.get_text_range_mut(chunk_size, 1024).unwrap();
4840 assert_eq!(second_chunk_data.len(), 1024);
4841 assert!(second_chunk_data.iter().all(|&b| b == b'B'));
4842
4843 let third_chunk_data = buffer.get_text_range_mut(chunk_size * 2, 1024).unwrap();
4845 assert_eq!(third_chunk_data.len(), 1024);
4846 assert!(third_chunk_data.iter().all(|&b| b == b'C'));
4847
4848 let cross_chunk_offset = chunk_size - 512;
4851 let cross_chunk_data = buffer.get_text_range_mut(cross_chunk_offset, 1024).unwrap();
4852 assert_eq!(cross_chunk_data.len(), 1024);
4853 assert!(cross_chunk_data[..512].iter().all(|&b| b == b'A'));
4855 assert!(cross_chunk_data[512..].iter().all(|&b| b == b'B'));
4856
4857 assert!(
4860 buffer.buffers.len() > 1,
4861 "Expected multiple buffers after chunk-based loading, got {}",
4862 buffer.buffers.len()
4863 );
4864
4865 buffer.insert_bytes(0, b"PREFIX".to_vec());
4867 assert_eq!(buffer.total_bytes(), file_size + 6);
4868
4869 let after_insert = buffer.get_text_range_mut(0, 6).unwrap();
4870 assert_eq!(after_insert, b"PREFIX");
4871
4872 let after_prefix = buffer.get_text_range_mut(6, 10).unwrap();
4874 assert!(after_prefix.iter().all(|&b| b == b'A'));
4875
4876 let mut buffer2 = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
4879
4880 let chunk_read_size = 64 * 1024; let mut offset = 0;
4883 while offset < file_size {
4884 let bytes_to_read = chunk_read_size.min(file_size - offset);
4885 let chunk_data = buffer2.get_text_range_mut(offset, bytes_to_read).unwrap();
4886
4887 let first_mb_end = chunk_size;
4889 let second_mb_end = chunk_size * 2;
4890
4891 for (i, &byte) in chunk_data.iter().enumerate() {
4893 let file_offset = offset + i;
4894 let expected = if file_offset < first_mb_end {
4895 b'A'
4896 } else if file_offset < second_mb_end {
4897 b'B'
4898 } else {
4899 b'C'
4900 };
4901 assert_eq!(
4902 byte, expected,
4903 "Mismatch at file offset {}: expected {}, got {}",
4904 file_offset, expected as char, byte as char
4905 );
4906 }
4907
4908 offset += bytes_to_read;
4909 }
4910 }
4911
4912 #[test]
4916 fn test_large_file_incremental_save() {
4917 let temp_dir = TempDir::new().unwrap();
4918 let file_path = temp_dir.path().join("large_save_test.txt");
4919
4920 let chunk_size = 1000; let file_size = chunk_size * 2; let mut file = File::create(&file_path).unwrap();
4925 file.write_all(&vec![b'A'; chunk_size]).unwrap();
4927 file.write_all(&vec![b'B'; chunk_size]).unwrap();
4929 file.flush().unwrap();
4930
4931 let mut buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
4933 assert!(buffer.large_file);
4934 assert_eq!(buffer.total_bytes(), file_size);
4935
4936 let first_bytes = buffer.get_text_range_mut(0, 50).unwrap();
4938 assert!(first_bytes.iter().all(|&b| b == b'A'));
4939
4940 buffer.insert_bytes(0, b"PREFIX_".to_vec());
4942
4943 let save_path = temp_dir.path().join("saved.txt");
4945 buffer.save_to_file(&save_path).unwrap();
4946
4947 let saved_content = std::fs::read(&save_path).unwrap();
4949
4950 assert_eq!(
4952 saved_content.len(),
4953 file_size + 7,
4954 "Saved file should be {} bytes, got {}",
4955 file_size + 7,
4956 saved_content.len()
4957 );
4958
4959 assert_eq!(&saved_content[..7], b"PREFIX_", "Should start with PREFIX_");
4961
4962 assert!(
4964 saved_content[7..100].iter().all(|&b| b == b'A'),
4965 "First chunk after prefix should be A's"
4966 );
4967
4968 let second_chunk_start = 7 + chunk_size;
4970 assert!(
4971 saved_content[second_chunk_start..second_chunk_start + 100]
4972 .iter()
4973 .all(|&b| b == b'B'),
4974 "Second chunk should be B's (was unloaded, should be preserved)"
4975 );
4976 }
4977
4978 #[test]
4980 fn test_large_file_save_with_multiple_edits() {
4981 let temp_dir = TempDir::new().unwrap();
4982 let file_path = temp_dir.path().join("multi_edit.txt");
4983
4984 let mut content = Vec::new();
4986 for i in 0..100 {
4987 content.extend_from_slice(
4988 format!("Line {:04}: padding to make it longer\n", i).as_bytes(),
4989 );
4990 }
4991 let original_len = content.len();
4992 std::fs::write(&file_path, &content).unwrap();
4993
4994 let mut buffer = TextBuffer::load_from_file(&file_path, 500, test_fs()).unwrap();
4996 assert!(
4997 buffer.line_count().is_none(),
4998 "Should be in large file mode"
4999 );
5000
5001 buffer.insert_bytes(0, b"[START]".to_vec());
5003
5004 let mid_offset = original_len / 2;
5006 let _mid_bytes = buffer.get_text_range_mut(mid_offset + 7, 10).unwrap(); buffer.insert_bytes(mid_offset + 7, b"[MIDDLE]".to_vec());
5008
5009 let save_path = temp_dir.path().join("multi_edit_saved.txt");
5011 buffer.save_to_file(&save_path).unwrap();
5012
5013 let saved = std::fs::read_to_string(&save_path).unwrap();
5015
5016 assert!(
5017 saved.starts_with("[START]Line 0000"),
5018 "Should start with our edit"
5019 );
5020 assert!(saved.contains("[MIDDLE]"), "Should contain middle edit");
5021 assert!(saved.contains("Line 0099"), "Should preserve end of file");
5022
5023 let expected_len = original_len + 7 + 8; assert_eq!(
5026 saved.len(),
5027 expected_len,
5028 "Length should be original + edits"
5029 );
5030 }
5031 }
5032
5033 #[test]
5037 fn test_offset_to_position_simple() {
5038 let content = b"a\nb\nc\nd";
5044 let buffer = TextBuffer::from_bytes(content.to_vec(), test_fs());
5045
5046 let pos = buffer
5048 .offset_to_position(0)
5049 .expect("small buffer should have line metadata");
5050 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5051 assert_eq!(pos.column, 0);
5052
5053 let pos = buffer
5054 .offset_to_position(1)
5055 .expect("small buffer should have line metadata");
5056 assert_eq!(pos.line, 0, "Byte 1 (newline) should be on line 0");
5057 assert_eq!(pos.column, 1);
5058
5059 let pos = buffer
5060 .offset_to_position(2)
5061 .expect("small buffer should have line metadata");
5062 assert_eq!(pos.line, 1, "Byte 2 should be on line 1");
5063 assert_eq!(pos.column, 0);
5064
5065 let pos = buffer
5066 .offset_to_position(3)
5067 .expect("small buffer should have line metadata");
5068 assert_eq!(pos.line, 1, "Byte 3 (newline) should be on line 1");
5069 assert_eq!(pos.column, 1);
5070
5071 let pos = buffer
5072 .offset_to_position(4)
5073 .expect("small buffer should have line metadata");
5074 assert_eq!(pos.line, 2, "Byte 4 should be on line 2");
5075 assert_eq!(pos.column, 0);
5076
5077 let pos = buffer
5078 .offset_to_position(6)
5079 .expect("small buffer should have line metadata");
5080 assert_eq!(pos.line, 3, "Byte 6 should be on line 3");
5081 assert_eq!(pos.column, 0);
5082 }
5083
5084 #[test]
5085 fn test_offset_to_position_after_insert() {
5086 let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
5088
5089 buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
5091
5092 let pos = buffer
5098 .offset_to_position(0)
5099 .expect("small buffer should have line metadata");
5100 assert_eq!(pos.line, 0, "Byte 0 should still be on line 0");
5101
5102 let pos = buffer
5103 .offset_to_position(2)
5104 .expect("small buffer should have line metadata");
5105 assert_eq!(
5106 pos.line, 1,
5107 "Byte 2 (start of inserted line) should be on line 1"
5108 );
5109
5110 let pos = buffer
5111 .offset_to_position(4)
5112 .expect("small buffer should have line metadata");
5113 assert_eq!(
5114 pos.line, 2,
5115 "Byte 4 (start of 'b') should be on line 2 after insert"
5116 );
5117 }
5118
5119 #[test]
5120 fn test_offset_to_position_empty_lines() {
5121 let buffer = TextBuffer::from_bytes(b"\n\n\n".to_vec(), test_fs());
5123
5124 let pos = buffer
5130 .offset_to_position(0)
5131 .expect("small buffer should have line metadata");
5132 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5133
5134 let pos = buffer
5135 .offset_to_position(1)
5136 .expect("small buffer should have line metadata");
5137 assert_eq!(pos.line, 1, "Byte 1 should be on line 1");
5138
5139 let pos = buffer
5140 .offset_to_position(2)
5141 .expect("small buffer should have line metadata");
5142 assert_eq!(pos.line, 2, "Byte 2 should be on line 2");
5143
5144 let pos = buffer
5145 .offset_to_position(3)
5146 .expect("small buffer should have line metadata");
5147 assert_eq!(pos.line, 3, "Byte 3 (EOF) should be on line 3");
5148 }
5149
5150 #[test]
5151 fn test_offset_to_position_long_lines() {
5152 let mut content = Vec::new();
5154 content.extend_from_slice(b"aaaaaaaaaa\n"); content.extend_from_slice(b"bbbbbbbbbb\n"); content.extend_from_slice(b"cccccccccc"); let buffer = TextBuffer::from_bytes(content.clone(), test_fs());
5159
5160 let pos = buffer
5162 .offset_to_position(0)
5163 .expect("small buffer should have line metadata");
5164 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5165 assert_eq!(pos.column, 0);
5166
5167 let pos = buffer
5168 .offset_to_position(11)
5169 .expect("small buffer should have line metadata");
5170 assert_eq!(pos.line, 1, "Byte 11 (start of line 1) should be on line 1");
5171 assert_eq!(pos.column, 0);
5172
5173 let pos = buffer
5174 .offset_to_position(22)
5175 .expect("small buffer should have line metadata");
5176 assert_eq!(pos.line, 2, "Byte 22 (start of line 2) should be on line 2");
5177 assert_eq!(pos.column, 0);
5178
5179 let pos = buffer
5181 .offset_to_position(5)
5182 .expect("small buffer should have line metadata");
5183 assert_eq!(pos.line, 0, "Byte 5 should be on line 0");
5184 assert_eq!(pos.column, 5);
5185
5186 let pos = buffer
5187 .offset_to_position(16)
5188 .expect("small buffer should have line metadata");
5189 assert_eq!(pos.line, 1, "Byte 16 should be on line 1");
5190 assert_eq!(pos.column, 5);
5191 }
5192
5193 #[test]
5194 fn test_line_iterator_with_offset_to_position() {
5195 let mut buffer = TextBuffer::from_bytes(b"line0\nline1\nline2\n".to_vec(), test_fs());
5197
5198 for byte_pos in 0..=buffer.len() {
5200 let iter = buffer.line_iterator(byte_pos, 80);
5201 let iter_pos = iter.current_position();
5202 let expected_line = buffer
5203 .offset_to_position(byte_pos)
5204 .expect("small buffer should have line metadata")
5205 .line;
5206 let expected_line_start = buffer.position_to_offset(Position {
5207 line: expected_line,
5208 column: 0,
5209 });
5210
5211 assert_eq!(
5212 iter_pos, expected_line_start,
5213 "LineIterator at byte {} should position at line start {} but got {}",
5214 byte_pos, expected_line_start, iter_pos
5215 );
5216 }
5217 }
5218
5219 #[test]
5220 fn test_piece_tree_line_count_after_insert() {
5221 let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
5223
5224 buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
5226
5227 let content = buffer.slice_bytes(0..buffer.len());
5229 let newline_count = content.iter().filter(|&&b| b == b'\n').count();
5230 let expected_line_count = newline_count + 1;
5231 let actual_line_count = buffer.line_count();
5232
5233 assert_eq!(
5234 actual_line_count,
5235 Some(expected_line_count),
5236 "Line count mismatch after insert"
5237 );
5238 }
5239
5240 #[test]
5241 fn test_position_to_lsp_position_after_modification() {
5242 let initial = b"fn foo(val: i32) {\n val + 1\n}\n";
5249 let mut buffer = TextBuffer::from_bytes(initial.to_vec(), test_fs());
5250
5251 let (line, char) = buffer.position_to_lsp_position(23);
5254 assert_eq!(line, 1, "Initial: position 23 should be on line 1");
5255 assert_eq!(char, 4, "Initial: position 23 should be at char 4");
5256
5257 buffer.delete_range(
5260 Position { line: 1, column: 4 },
5261 Position { line: 1, column: 7 },
5262 );
5263 buffer.insert_bytes(23, b"value".to_vec()); buffer.delete_range(
5268 Position { line: 0, column: 7 },
5269 Position {
5270 line: 0,
5271 column: 10,
5272 },
5273 );
5274 buffer.insert_bytes(7, b"value".to_vec()); let content = String::from_utf8_lossy(&buffer.get_all_text().unwrap()).to_string();
5278 assert_eq!(content, "fn foo(value: i32) {\n value + 1\n}\n");
5279
5280 let (line, char) = buffer.position_to_lsp_position(25);
5287 assert_eq!(
5288 line, 1,
5289 "After modification: position 25 should be on line 1"
5290 );
5291 assert_eq!(
5292 char, 4,
5293 "After modification: position 25 should be at char 4"
5294 );
5295
5296 let (line, char) = buffer.position_to_lsp_position(21);
5298 assert_eq!(line, 1, "Position 21 should be on line 1");
5299 assert_eq!(char, 0, "Position 21 should be at char 0 (start of line)");
5300 }
5301
5302 #[test]
5303 fn test_detect_crlf() {
5304 assert_eq!(
5305 TextBuffer::detect_line_ending(b"hello\r\nworld\r\n"),
5306 LineEnding::CRLF
5307 );
5308 }
5309
5310 #[test]
5311 fn test_detect_lf() {
5312 assert_eq!(
5313 TextBuffer::detect_line_ending(b"hello\nworld\n"),
5314 LineEnding::LF
5315 );
5316 }
5317
5318 #[test]
5319 fn test_normalize_crlf() {
5320 let input = b"hello\r\nworld\r\n".to_vec();
5321 let output = TextBuffer::normalize_line_endings(input);
5322 assert_eq!(output, b"hello\nworld\n");
5323 }
5324
5325 #[test]
5326 fn test_normalize_empty() {
5327 let input = Vec::new();
5328 let output = TextBuffer::normalize_line_endings(input);
5329 assert_eq!(output, Vec::<u8>::new());
5330 }
5331
5332 #[test]
5339 fn test_get_all_text_returns_empty_for_unloaded_buffers() {
5340 use tempfile::TempDir;
5341 let temp_dir = TempDir::new().unwrap();
5342 let file_path = temp_dir.path().join("large_test.txt");
5343
5344 let original_content = "X".repeat(50_000);
5346 std::fs::write(&file_path, &original_content).unwrap();
5347
5348 let mut buffer = TextBuffer::load_from_file(&file_path, 1024, test_fs()).unwrap();
5350 assert!(buffer.large_file, "Should be in large file mode");
5351 assert!(!buffer.buffers[0].is_loaded(), "Buffer should be unloaded");
5352
5353 buffer.insert_bytes(0, b"EDITED: ".to_vec());
5355
5356 let content_immutable = buffer.get_all_text();
5359
5360 assert!(
5363 content_immutable.is_none(),
5364 "get_all_text() should return None for large files with unloaded regions. \
5365 Got Some({} bytes) instead of None.",
5366 content_immutable.as_ref().map(|c| c.len()).unwrap_or(0)
5367 );
5368
5369 let total = buffer.total_bytes();
5371 let content_lazy = buffer.get_text_range_mut(0, total).unwrap();
5372 assert_eq!(
5373 content_lazy.len(),
5374 50_000 + 8,
5375 "get_text_range_mut() should return all content with lazy loading"
5376 );
5377 assert!(
5378 String::from_utf8_lossy(&content_lazy).starts_with("EDITED: "),
5379 "Content should start with our edit"
5380 );
5381 }
5382
5383 mod line_ending_conversion {
5386 use super::*;
5387
5388 #[test]
5389 fn test_convert_lf_to_crlf() {
5390 let input = b"Line 1\nLine 2\nLine 3\n";
5391 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
5392 assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
5393 }
5394
5395 #[test]
5396 fn test_convert_crlf_to_lf() {
5397 let input = b"Line 1\r\nLine 2\r\nLine 3\r\n";
5398 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
5399 assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
5400 }
5401
5402 #[test]
5403 fn test_convert_cr_to_lf() {
5404 let input = b"Line 1\rLine 2\rLine 3\r";
5405 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
5406 assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
5407 }
5408
5409 #[test]
5410 fn test_convert_mixed_to_crlf() {
5411 let input = b"Line 1\nLine 2\r\nLine 3\r";
5413 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
5414 assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
5415 }
5416
5417 #[test]
5418 fn test_convert_lf_to_lf_is_noop() {
5419 let input = b"Line 1\nLine 2\nLine 3\n";
5420 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
5421 assert_eq!(result, input.to_vec());
5422 }
5423
5424 #[test]
5425 fn test_convert_empty_content() {
5426 let input = b"";
5427 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
5428 assert_eq!(result, b"".to_vec());
5429 }
5430
5431 #[test]
5432 fn test_convert_no_line_endings() {
5433 let input = b"No line endings here";
5434 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
5435 assert_eq!(result, b"No line endings here".to_vec());
5436 }
5437
5438 #[test]
5439 fn test_set_line_ending_marks_modified() {
5440 let mut buffer = TextBuffer::from_bytes(b"Hello\nWorld\n".to_vec(), test_fs());
5441 assert!(!buffer.is_modified());
5442
5443 buffer.set_line_ending(LineEnding::CRLF);
5444 assert!(buffer.is_modified());
5445 }
5446
5447 #[test]
5448 fn test_set_default_line_ending_does_not_mark_modified() {
5449 let mut buffer = TextBuffer::empty(test_fs());
5450 assert!(!buffer.is_modified());
5451
5452 buffer.set_default_line_ending(LineEnding::CRLF);
5453 assert!(!buffer.is_modified());
5454 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
5455 }
5456
5457 #[test]
5458 fn test_save_to_file_converts_lf_to_crlf() {
5459 use tempfile::TempDir;
5460
5461 let temp_dir = TempDir::new().unwrap();
5462 let file_path = temp_dir.path().join("test_lf_to_crlf.txt");
5463
5464 let original_content = b"Line 1\nLine 2\nLine 3\n";
5466 std::fs::write(&file_path, original_content).unwrap();
5467
5468 let mut buffer =
5470 TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
5471 .unwrap();
5472 assert_eq!(buffer.line_ending(), LineEnding::LF);
5473
5474 buffer.set_line_ending(LineEnding::CRLF);
5476 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
5477 assert!(buffer.is_modified());
5478
5479 buffer.save_to_file(&file_path).unwrap();
5481
5482 let saved_bytes = std::fs::read(&file_path).unwrap();
5484 assert_eq!(&saved_bytes, b"Line 1\r\nLine 2\r\nLine 3\r\n");
5485 }
5486
5487 #[test]
5488 fn test_save_to_file_converts_crlf_to_lf() {
5489 use tempfile::TempDir;
5490
5491 let temp_dir = TempDir::new().unwrap();
5492 let file_path = temp_dir.path().join("test_crlf_to_lf.txt");
5493
5494 let original_content = b"Line 1\r\nLine 2\r\nLine 3\r\n";
5496 std::fs::write(&file_path, original_content).unwrap();
5497
5498 let mut buffer =
5500 TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
5501 .unwrap();
5502 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
5503
5504 buffer.set_line_ending(LineEnding::LF);
5506 assert_eq!(buffer.line_ending(), LineEnding::LF);
5507 assert!(buffer.is_modified());
5508
5509 buffer.save_to_file(&file_path).unwrap();
5511
5512 let saved_bytes = std::fs::read(&file_path).unwrap();
5514 assert_eq!(&saved_bytes, b"Line 1\nLine 2\nLine 3\n");
5515 }
5516
5517 #[test]
5518 #[cfg(unix)]
5519 fn test_save_to_unwritable_file() -> anyhow::Result<()> {
5520 if unsafe { libc::getuid() } == 0 {
5523 eprintln!("Skipping test: root bypasses file permission checks");
5524 return Ok(());
5525 }
5526 use std::fs::Permissions;
5527 use std::os::unix::fs::PermissionsExt;
5528 use tempfile::TempDir;
5529
5530 let temp_dir = TempDir::new().unwrap();
5531 let unwritable_dir = temp_dir.path().join("unwritable_dir");
5532 std::fs::create_dir(&unwritable_dir)?;
5533
5534 let file_path = unwritable_dir.join("unwritable.txt");
5535 std::fs::write(&file_path, "original content")?;
5536
5537 std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
5539
5540 let mut buffer = TextBuffer::from_bytes(b"new content".to_vec(), test_fs());
5541 let result = buffer.save_to_file(&file_path);
5542
5543 match result {
5545 Err(e) => {
5546 if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
5547 assert_eq!(sudo_err.dest_path, file_path);
5548 assert!(sudo_err.temp_path.exists());
5549 drop(std::fs::remove_file(&sudo_err.temp_path));
5551 } else {
5552 panic!("Expected SudoSaveRequired error, got: {:?}", e);
5553 }
5554 }
5555 Ok(_) => panic!("Expected error, but save succeeded"),
5556 }
5557
5558 Ok(())
5559 }
5560
5561 #[test]
5562 #[cfg(unix)]
5563 fn test_save_to_unwritable_directory() -> anyhow::Result<()> {
5564 if unsafe { libc::getuid() } == 0 {
5567 eprintln!("Skipping test: root bypasses file permission checks");
5568 return Ok(());
5569 }
5570 use std::fs::Permissions;
5571 use std::os::unix::fs::PermissionsExt;
5572 use tempfile::TempDir;
5573
5574 let temp_dir = TempDir::new().unwrap();
5575 let unwritable_dir = temp_dir.path().join("unwritable_dir");
5576 std::fs::create_dir(&unwritable_dir)?;
5577
5578 let file_path = unwritable_dir.join("test.txt");
5579
5580 std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
5582
5583 let mut buffer = TextBuffer::from_bytes(b"content".to_vec(), test_fs());
5584 let result = buffer.save_to_file(&file_path);
5585
5586 match result {
5587 Err(e) => {
5588 if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
5589 assert_eq!(sudo_err.dest_path, file_path);
5590 assert!(sudo_err.temp_path.exists());
5591 assert!(sudo_err.temp_path.starts_with(std::env::temp_dir()));
5593 drop(std::fs::remove_file(&sudo_err.temp_path));
5595 } else {
5596 panic!("Expected SudoSaveRequired error, got: {:?}", e);
5597 }
5598 }
5599 Ok(_) => panic!("Expected error, but save succeeded"),
5600 }
5601
5602 Ok(())
5603 }
5604 }
5605
5606 mod large_file_encoding_tests {
5607 use super::*;
5608
5609 #[test]
5610 fn test_large_file_encoding_confirmation_display() {
5611 let confirmation = LargeFileEncodingConfirmation {
5612 path: PathBuf::from("/test/file.txt"),
5613 file_size: 150 * 1024 * 1024, encoding: Encoding::ShiftJis,
5615 };
5616
5617 let display = format!("{}", confirmation);
5618 assert!(display.contains("150 MB"), "Display: {}", display);
5619 assert!(display.contains("Shift-JIS"), "Display: {}", display);
5620 assert!(
5621 display.contains("requires full load"),
5622 "Display: {}",
5623 display
5624 );
5625 }
5626
5627 #[test]
5628 fn test_large_file_encoding_confirmation_equality() {
5629 let a = LargeFileEncodingConfirmation {
5630 path: PathBuf::from("/test/file.txt"),
5631 file_size: 100 * 1024 * 1024,
5632 encoding: Encoding::Gb18030,
5633 };
5634 let b = LargeFileEncodingConfirmation {
5635 path: PathBuf::from("/test/file.txt"),
5636 file_size: 100 * 1024 * 1024,
5637 encoding: Encoding::Gb18030,
5638 };
5639 let c = LargeFileEncodingConfirmation {
5640 path: PathBuf::from("/test/other.txt"),
5641 file_size: 100 * 1024 * 1024,
5642 encoding: Encoding::Gb18030,
5643 };
5644
5645 assert_eq!(a, b);
5646 assert_ne!(a, c);
5647 }
5648
5649 #[test]
5650 fn test_encoding_requires_confirmation() {
5651 assert!(!Encoding::Utf8.requires_full_file_load());
5653 assert!(!Encoding::Utf8Bom.requires_full_file_load());
5654 assert!(!Encoding::Ascii.requires_full_file_load());
5655 assert!(!Encoding::Latin1.requires_full_file_load());
5656 assert!(!Encoding::Windows1252.requires_full_file_load());
5657 assert!(!Encoding::Utf16Le.requires_full_file_load());
5658 assert!(!Encoding::Utf16Be.requires_full_file_load());
5659
5660 assert!(Encoding::Gb18030.requires_full_file_load());
5662 assert!(Encoding::Gbk.requires_full_file_load());
5663 assert!(Encoding::ShiftJis.requires_full_file_load());
5664 assert!(Encoding::EucKr.requires_full_file_load());
5665 }
5666
5667 #[test]
5668 fn test_check_large_file_encoding_small_file() {
5669 use tempfile::NamedTempFile;
5670
5671 let temp = NamedTempFile::new().unwrap();
5673 std::fs::write(temp.path(), b"hello world").unwrap();
5674
5675 let result = TextBuffer::check_large_file_encoding(temp.path(), test_fs()).unwrap();
5676 assert!(
5677 result.is_none(),
5678 "Small files should not require confirmation"
5679 );
5680 }
5681
5682 #[test]
5683 fn test_large_file_encoding_error_downcast() {
5684 let confirmation = LargeFileEncodingConfirmation {
5686 path: PathBuf::from("/test/file.txt"),
5687 file_size: 200 * 1024 * 1024,
5688 encoding: Encoding::EucKr,
5689 };
5690
5691 let error: anyhow::Error = confirmation.clone().into();
5692 let downcast = error.downcast_ref::<LargeFileEncodingConfirmation>();
5693 assert!(downcast.is_some());
5694 assert_eq!(downcast.unwrap().encoding, Encoding::EucKr);
5695 }
5696 }
5697
5698 mod rebuild_pristine_saved_root_tests {
5699 use super::*;
5700 use crate::model::piece_tree::BufferLocation;
5701 use std::sync::Arc;
5702
5703 fn large_file_buffer(content: &[u8]) -> TextBuffer {
5706 let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
5707 Arc::new(crate::model::filesystem::StdFileSystem);
5708 let bytes = content.len();
5709 let buffer =
5710 crate::model::piece_tree::StringBuffer::new_loaded(0, content.to_vec(), false);
5711 let piece_tree = if bytes > 0 {
5712 crate::model::piece_tree::PieceTree::new(BufferLocation::Stored(0), 0, bytes, None)
5713 } else {
5714 crate::model::piece_tree::PieceTree::empty()
5715 };
5716 let saved_root = piece_tree.root();
5717 TextBuffer {
5718 fs,
5719 piece_tree,
5720 saved_root,
5721 buffers: vec![buffer],
5722 next_buffer_id: 1,
5723 file_path: None,
5724 modified: false,
5725 recovery_pending: false,
5726 large_file: true,
5727 line_feeds_scanned: false,
5728 is_binary: false,
5729 line_ending: LineEnding::LF,
5730 original_line_ending: LineEnding::LF,
5731 encoding: Encoding::Utf8,
5732 original_encoding: Encoding::Utf8,
5733 saved_file_size: Some(bytes),
5734 version: 0,
5735 config: BufferConfig::default(),
5736 }
5737 }
5738
5739 fn scan_line_feeds(buf: &mut TextBuffer) -> Vec<(usize, usize)> {
5741 buf.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
5742 let leaves = buf.piece_tree.get_leaves();
5743 let mut updates = Vec::new();
5744 for (idx, leaf) in leaves.iter().enumerate() {
5745 if leaf.line_feed_cnt.is_some() {
5746 continue;
5747 }
5748 let count = buf.scan_leaf(leaf).unwrap();
5749 updates.push((idx, count));
5750 }
5751 updates
5752 }
5753
5754 fn make_content(size: usize) -> Vec<u8> {
5756 let line = b"abcdefghij0123456789ABCDEFGHIJ0123456789abcdefghij0123456789ABCDEFGHIJ\n";
5757 let mut out = Vec::with_capacity(size);
5758 while out.len() < size {
5759 let remaining = size - out.len();
5760 let take = remaining.min(line.len());
5761 out.extend_from_slice(&line[..take]);
5762 }
5763 out
5764 }
5765
5766 #[test]
5767 fn test_no_edits_arc_ptr_eq() {
5768 let content = make_content(2 * 1024 * 1024);
5769 let expected_lf = content.iter().filter(|&&b| b == b'\n').count();
5770 let mut buf = large_file_buffer(&content);
5771
5772 assert!(buf.line_count().is_none());
5774
5775 let updates = scan_line_feeds(&mut buf);
5776 buf.rebuild_with_pristine_saved_root(&updates);
5777
5778 assert_eq!(buf.line_count(), Some(expected_lf + 1));
5780
5781 assert!(Arc::ptr_eq(&buf.saved_root, &buf.piece_tree.root()));
5783 let diff = buf.diff_since_saved();
5784 assert!(diff.equal);
5785 assert!(buf.line_feeds_scanned);
5786 assert_eq!(buf.get_all_text().unwrap(), content);
5787 }
5788
5789 #[test]
5790 fn test_single_insertion() {
5791 let content = make_content(2 * 1024 * 1024);
5792 let mut buf = large_file_buffer(&content);
5793 let updates = scan_line_feeds(&mut buf);
5794
5795 let insert_offset = 1_000_000;
5797 let insert_text = b"INSERTED_TEXT\n";
5798 buf.insert_bytes(insert_offset, insert_text.to_vec());
5799
5800 buf.rebuild_with_pristine_saved_root(&updates);
5801
5802 let mut expected = content.clone();
5804 expected.splice(insert_offset..insert_offset, insert_text.iter().copied());
5805 assert_eq!(buf.get_all_text().unwrap(), expected);
5806
5807 let expected_lf = expected.iter().filter(|&&b| b == b'\n').count();
5809 assert_eq!(buf.line_count(), Some(expected_lf + 1));
5810
5811 let diff = buf.diff_since_saved();
5813 assert!(!diff.equal);
5814 assert!(!diff.byte_ranges.is_empty());
5815 }
5816
5817 #[test]
5822 fn test_diff_line_ranges_are_document_absolute_after_eof_insert() {
5823 let content = make_content(4 * 1024 * 1024); let total_lf = content.iter().filter(|&&b| b == b'\n').count();
5825 let mut buf = large_file_buffer(&content);
5826 let updates = scan_line_feeds(&mut buf);
5827 buf.rebuild_with_pristine_saved_root(&updates);
5828
5829 let insert_offset = content.len() - 100;
5831 buf.insert_bytes(insert_offset, b"HELLO".to_vec());
5832
5833 let diff = buf.diff_since_saved();
5834 assert!(!diff.equal, "diff should detect the insertion");
5835 assert!(
5836 !diff.byte_ranges.is_empty(),
5837 "byte_ranges should not be empty"
5838 );
5839
5840 let first_range = &diff.byte_ranges[0];
5842 assert!(
5843 first_range.start >= content.len() - 200,
5844 "byte_ranges should be document-absolute (near EOF): got {:?}, expected near {}",
5845 first_range,
5846 insert_offset,
5847 );
5848
5849 let line_ranges = diff
5851 .line_ranges
5852 .as_ref()
5853 .expect("line_ranges should be Some");
5854 assert!(!line_ranges.is_empty(), "line_ranges should not be empty");
5855 let first_lr = &line_ranges[0];
5856 let expected_min_line = total_lf.saturating_sub(10);
5858 assert!(
5859 first_lr.start >= expected_min_line,
5860 "line_ranges should be document-absolute: got {:?}, expected start >= {} (total lines ~{})",
5861 first_lr,
5862 expected_min_line,
5863 total_lf,
5864 );
5865 }
5866
5867 #[test]
5868 fn test_single_deletion() {
5869 let content = make_content(2 * 1024 * 1024);
5870 let mut buf = large_file_buffer(&content);
5871 let updates = scan_line_feeds(&mut buf);
5872
5873 let del_start = 500_000;
5875 let del_len = 1000;
5876 buf.delete_bytes(del_start, del_len);
5877
5878 buf.rebuild_with_pristine_saved_root(&updates);
5879
5880 let mut expected = content.clone();
5881 expected.drain(del_start..del_start + del_len);
5882 assert_eq!(buf.get_all_text().unwrap(), expected);
5883
5884 let diff = buf.diff_since_saved();
5885 assert!(!diff.equal);
5886 }
5887
5888 #[test]
5889 fn test_insert_and_delete() {
5890 let content = make_content(2 * 1024 * 1024);
5891 let mut buf = large_file_buffer(&content);
5892 let updates = scan_line_feeds(&mut buf);
5893
5894 let del_start = 100_000;
5896 let del_len = 500;
5897 buf.delete_bytes(del_start, del_len);
5898
5899 let insert_offset = 1_500_000; let insert_text = b"NEW_CONTENT\n";
5901 buf.insert_bytes(insert_offset, insert_text.to_vec());
5902
5903 buf.rebuild_with_pristine_saved_root(&updates);
5904
5905 let mut expected = content.clone();
5907 expected.drain(del_start..del_start + del_len);
5908 expected.splice(insert_offset..insert_offset, insert_text.iter().copied());
5909 assert_eq!(buf.get_all_text().unwrap(), expected);
5910
5911 let diff = buf.diff_since_saved();
5912 assert!(!diff.equal);
5913 }
5914
5915 #[test]
5916 fn test_multiple_scattered_edits() {
5917 let content = make_content(3 * 1024 * 1024);
5918 let mut buf = large_file_buffer(&content);
5919 let updates = scan_line_feeds(&mut buf);
5920 let mut expected = content.clone();
5921
5922 buf.delete_bytes(100_000, 200);
5925 expected.drain(100_000..100_200);
5926
5927 buf.insert_bytes(500_000, b"AAAA\n".to_vec());
5929 expected.splice(500_000..500_000, b"AAAA\n".iter().copied());
5930
5931 buf.delete_bytes(2_000_000, 300);
5933 expected.drain(2_000_000..2_000_300);
5934
5935 buf.insert_bytes(1_000_000, b"BBBB\n".to_vec());
5937 expected.splice(1_000_000..1_000_000, b"BBBB\n".iter().copied());
5938
5939 buf.rebuild_with_pristine_saved_root(&updates);
5940
5941 assert_eq!(buf.get_all_text().unwrap(), expected);
5942 let diff = buf.diff_since_saved();
5943 assert!(!diff.equal);
5944 }
5945
5946 #[test]
5947 fn test_content_preserved_after_rebuild() {
5948 let content = make_content(2 * 1024 * 1024);
5951 let mut buf = large_file_buffer(&content);
5952 let updates = scan_line_feeds(&mut buf);
5953
5954 buf.insert_bytes(0, b"HEADER\n".to_vec());
5955 buf.delete_bytes(1_000_000, 500);
5956
5957 let text_before = buf.get_all_text().unwrap();
5958 buf.rebuild_with_pristine_saved_root(&updates);
5959 let text_after = buf.get_all_text().unwrap();
5960
5961 assert_eq!(text_before, text_after);
5962 }
5963
5964 fn large_file_buffer_unloaded(path: &std::path::Path, file_size: usize) -> TextBuffer {
5967 let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
5968 Arc::new(crate::model::filesystem::StdFileSystem);
5969 let buffer = crate::model::piece_tree::StringBuffer::new_unloaded(
5970 0,
5971 path.to_path_buf(),
5972 0,
5973 file_size,
5974 );
5975 let piece_tree = if file_size > 0 {
5976 crate::model::piece_tree::PieceTree::new(
5977 BufferLocation::Stored(0),
5978 0,
5979 file_size,
5980 None,
5981 )
5982 } else {
5983 crate::model::piece_tree::PieceTree::empty()
5984 };
5985 let saved_root = piece_tree.root();
5986 TextBuffer {
5987 fs,
5988 piece_tree,
5989 saved_root,
5990 buffers: vec![buffer],
5991 next_buffer_id: 1,
5992 file_path: Some(path.to_path_buf()),
5993 modified: false,
5994 recovery_pending: false,
5995 large_file: true,
5996 line_feeds_scanned: false,
5997 is_binary: false,
5998 line_ending: LineEnding::LF,
5999 original_line_ending: LineEnding::LF,
6000 encoding: Encoding::Utf8,
6001 original_encoding: Encoding::Utf8,
6002 saved_file_size: Some(file_size),
6003 version: 0,
6004 config: BufferConfig::default(),
6005 }
6006 }
6007
6008 #[test]
6009 fn test_unloaded_buffer_no_edits_line_count() {
6010 let content = make_content(2 * 1024 * 1024);
6011 let expected_lf = content.iter().filter(|&&b| b == b'\n').count();
6012
6013 let tmp = tempfile::NamedTempFile::new().unwrap();
6014 std::fs::write(tmp.path(), &content).unwrap();
6015 let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6016
6017 assert!(
6018 buf.line_count().is_none(),
6019 "before scan, line_count should be None"
6020 );
6021
6022 let updates = scan_line_feeds(&mut buf);
6023 buf.rebuild_with_pristine_saved_root(&updates);
6024
6025 assert_eq!(
6026 buf.line_count(),
6027 Some(expected_lf + 1),
6028 "after rebuild, line_count must be exact"
6029 );
6030 assert!(buf.line_feeds_scanned);
6031 }
6032
6033 #[test]
6034 fn test_unloaded_buffer_with_edits_line_count() {
6035 let content = make_content(2 * 1024 * 1024);
6036
6037 let tmp = tempfile::NamedTempFile::new().unwrap();
6038 std::fs::write(tmp.path(), &content).unwrap();
6039 let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6040
6041 let updates = scan_line_feeds(&mut buf);
6042
6043 let insert_text = b"INSERTED\n";
6045 buf.insert_bytes(1_000_000, insert_text.to_vec());
6046
6047 buf.rebuild_with_pristine_saved_root(&updates);
6048
6049 let mut expected = content.clone();
6050 expected.splice(1_000_000..1_000_000, insert_text.iter().copied());
6051 let expected_lf = expected.iter().filter(|&&b| b == b'\n').count();
6052
6053 assert_eq!(
6054 buf.line_count(),
6055 Some(expected_lf + 1),
6056 "after rebuild with edits, line_count must be exact"
6057 );
6058 assert!(buf.line_feeds_scanned);
6059 }
6060
6061 #[test]
6066 fn test_diff_efficiency_after_rebuild() {
6067 let content = make_content(32 * 1024 * 1024);
6070 let mut buf = large_file_buffer(&content);
6071
6072 let updates = scan_line_feeds(&mut buf);
6073
6074 buf.insert_bytes(1_000_000, b"HELLO".to_vec());
6076
6077 buf.rebuild_with_pristine_saved_root(&updates);
6078
6079 let diff = buf.diff_since_saved();
6080 assert!(!diff.equal);
6081
6082 let total_leaves = buf.piece_tree.get_leaves().len();
6083 assert!(
6088 diff.nodes_visited < total_leaves,
6089 "diff visited {} nodes but tree has {} leaves — \
6090 Arc::ptr_eq short-circuiting is not working",
6091 diff.nodes_visited,
6092 total_leaves,
6093 );
6094 }
6095
6096 #[test]
6103 fn test_viewport_load_after_rebuild_does_not_load_entire_file() {
6104 use std::sync::atomic::{AtomicUsize, Ordering};
6105
6106 struct TrackingFs {
6108 inner: crate::model::filesystem::StdFileSystem,
6109 max_read_range_len: Arc<AtomicUsize>,
6110 }
6111
6112 impl crate::model::filesystem::FileSystem for TrackingFs {
6113 fn read_file(&self, path: &Path) -> std::io::Result<Vec<u8>> {
6114 self.inner.read_file(path)
6115 }
6116 fn read_range(
6117 &self,
6118 path: &Path,
6119 offset: u64,
6120 len: usize,
6121 ) -> std::io::Result<Vec<u8>> {
6122 self.max_read_range_len.fetch_max(len, Ordering::SeqCst);
6123 self.inner.read_range(path, offset, len)
6124 }
6125 fn write_file(&self, path: &Path, data: &[u8]) -> std::io::Result<()> {
6126 self.inner.write_file(path, data)
6127 }
6128 fn create_file(
6129 &self,
6130 path: &Path,
6131 ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6132 {
6133 self.inner.create_file(path)
6134 }
6135 fn open_file(
6136 &self,
6137 path: &Path,
6138 ) -> std::io::Result<Box<dyn crate::model::filesystem::FileReader>>
6139 {
6140 self.inner.open_file(path)
6141 }
6142 fn open_file_for_write(
6143 &self,
6144 path: &Path,
6145 ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6146 {
6147 self.inner.open_file_for_write(path)
6148 }
6149 fn open_file_for_append(
6150 &self,
6151 path: &Path,
6152 ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6153 {
6154 self.inner.open_file_for_append(path)
6155 }
6156 fn set_file_length(&self, path: &Path, len: u64) -> std::io::Result<()> {
6157 self.inner.set_file_length(path, len)
6158 }
6159 fn rename(&self, from: &Path, to: &Path) -> std::io::Result<()> {
6160 self.inner.rename(from, to)
6161 }
6162 fn copy(&self, from: &Path, to: &Path) -> std::io::Result<u64> {
6163 self.inner.copy(from, to)
6164 }
6165 fn remove_file(&self, path: &Path) -> std::io::Result<()> {
6166 self.inner.remove_file(path)
6167 }
6168 fn remove_dir(&self, path: &Path) -> std::io::Result<()> {
6169 self.inner.remove_dir(path)
6170 }
6171 fn metadata(
6172 &self,
6173 path: &Path,
6174 ) -> std::io::Result<crate::model::filesystem::FileMetadata> {
6175 self.inner.metadata(path)
6176 }
6177 fn symlink_metadata(
6178 &self,
6179 path: &Path,
6180 ) -> std::io::Result<crate::model::filesystem::FileMetadata> {
6181 self.inner.symlink_metadata(path)
6182 }
6183 fn is_dir(&self, path: &Path) -> std::io::Result<bool> {
6184 self.inner.is_dir(path)
6185 }
6186 fn is_file(&self, path: &Path) -> std::io::Result<bool> {
6187 self.inner.is_file(path)
6188 }
6189 fn set_permissions(
6190 &self,
6191 path: &Path,
6192 permissions: &crate::model::filesystem::FilePermissions,
6193 ) -> std::io::Result<()> {
6194 self.inner.set_permissions(path, permissions)
6195 }
6196 fn is_owner(&self, path: &Path) -> bool {
6197 self.inner.is_owner(path)
6198 }
6199 fn read_dir(
6200 &self,
6201 path: &Path,
6202 ) -> std::io::Result<Vec<crate::model::filesystem::DirEntry>> {
6203 self.inner.read_dir(path)
6204 }
6205 fn create_dir(&self, path: &Path) -> std::io::Result<()> {
6206 self.inner.create_dir(path)
6207 }
6208 fn create_dir_all(&self, path: &Path) -> std::io::Result<()> {
6209 self.inner.create_dir_all(path)
6210 }
6211 fn canonicalize(&self, path: &Path) -> std::io::Result<PathBuf> {
6212 self.inner.canonicalize(path)
6213 }
6214 fn current_uid(&self) -> u32 {
6215 self.inner.current_uid()
6216 }
6217 fn sudo_write(
6218 &self,
6219 path: &Path,
6220 data: &[u8],
6221 mode: u32,
6222 uid: u32,
6223 gid: u32,
6224 ) -> std::io::Result<()> {
6225 self.inner.sudo_write(path, data, mode, uid, gid)
6226 }
6227 }
6228
6229 let file_size = LOAD_CHUNK_SIZE * 3;
6231 let content = make_content(file_size);
6232
6233 let tmp = tempfile::NamedTempFile::new().unwrap();
6234 std::fs::write(tmp.path(), &content).unwrap();
6235
6236 let max_read = Arc::new(AtomicUsize::new(0));
6237 let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
6238 Arc::new(TrackingFs {
6239 inner: crate::model::filesystem::StdFileSystem,
6240 max_read_range_len: max_read.clone(),
6241 });
6242
6243 let buffer = crate::model::piece_tree::StringBuffer::new_unloaded(
6245 0,
6246 tmp.path().to_path_buf(),
6247 0,
6248 file_size,
6249 );
6250 let piece_tree = PieceTree::new(BufferLocation::Stored(0), 0, file_size, None);
6251 let saved_root = piece_tree.root();
6252 let mut buf = TextBuffer {
6253 fs,
6254 piece_tree,
6255 saved_root,
6256 buffers: vec![buffer],
6257 next_buffer_id: 1,
6258 file_path: Some(tmp.path().to_path_buf()),
6259 modified: false,
6260 recovery_pending: false,
6261 large_file: true,
6262 line_feeds_scanned: false,
6263 is_binary: false,
6264 line_ending: LineEnding::LF,
6265 original_line_ending: LineEnding::LF,
6266 encoding: Encoding::Utf8,
6267 original_encoding: Encoding::Utf8,
6268 saved_file_size: Some(file_size),
6269 version: 0,
6270 config: BufferConfig::default(),
6271 };
6272
6273 let viewport_offset = LOAD_CHUNK_SIZE + 100; buf.get_text_range_mut(viewport_offset, 4096).unwrap();
6276
6277 let updates = scan_line_feeds(&mut buf);
6279 buf.rebuild_with_pristine_saved_root(&updates);
6280
6281 max_read.store(0, Ordering::SeqCst);
6283
6284 buf.get_text_range_mut(viewport_offset, 4096).unwrap();
6286
6287 let largest_read = max_read.load(Ordering::SeqCst);
6288 assert!(
6289 largest_read <= LOAD_CHUNK_SIZE,
6290 "After rebuild, loading a viewport triggered a read of {} bytes \
6291 (file_size={}). This means the entire Stored buffer is being \
6292 loaded instead of just the needed chunk.",
6293 largest_read,
6294 file_size,
6295 );
6296 }
6297
6298 #[test]
6304 fn test_viewport_load_after_rebuild_preserves_line_counts() {
6305 let file_size = LOAD_CHUNK_SIZE * 3;
6306 let content = make_content(file_size);
6307
6308 let tmp = tempfile::NamedTempFile::new().unwrap();
6309 std::fs::write(tmp.path(), &content).unwrap();
6310 let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6311
6312 let updates = scan_line_feeds(&mut buf);
6314 buf.rebuild_with_pristine_saved_root(&updates);
6315
6316 let line_count_before = buf.piece_tree.line_count();
6317 assert!(
6318 line_count_before.is_some(),
6319 "line_count must be Some after rebuild"
6320 );
6321
6322 let mid_piece_offset = LOAD_CHUNK_SIZE + LOAD_CHUNK_SIZE / 2;
6325 buf.get_text_range_mut(mid_piece_offset, 4096).unwrap();
6326
6327 let line_count_after = buf.piece_tree.line_count();
6328 assert!(
6329 line_count_after.is_some(),
6330 "line_count must still be Some after viewport load \
6331 (was {:?} before, now {:?})",
6332 line_count_before,
6333 line_count_after,
6334 );
6335 assert_eq!(
6336 line_count_before, line_count_after,
6337 "line_count must not change after viewport load"
6338 );
6339 }
6340
6341 #[test]
6343 fn test_diff_efficiency_after_rebuild_unloaded() {
6344 let content = make_content(32 * 1024 * 1024);
6345
6346 let tmp = tempfile::NamedTempFile::new().unwrap();
6347 std::fs::write(tmp.path(), &content).unwrap();
6348 let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6349
6350 let updates = scan_line_feeds(&mut buf);
6351
6352 buf.insert_bytes(1_000_000, b"HELLO".to_vec());
6353
6354 buf.rebuild_with_pristine_saved_root(&updates);
6355
6356 let diff = buf.diff_since_saved();
6357 assert!(!diff.equal);
6358
6359 let total_leaves = buf.piece_tree.get_leaves().len();
6360 assert!(
6361 diff.nodes_visited < total_leaves,
6362 "diff visited {} nodes but tree has {} leaves — \
6363 Arc::ptr_eq short-circuiting is not working (unloaded path)",
6364 diff.nodes_visited,
6365 total_leaves,
6366 );
6367 }
6368 }
6369}
6370
6371#[cfg(test)]
6372mod property_tests {
6373 use crate::model::filesystem::StdFileSystem;
6374 use std::sync::Arc;
6375
6376 fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
6377 Arc::new(StdFileSystem)
6378 }
6379 use super::*;
6380 use proptest::prelude::*;
6381
6382 fn text_with_newlines() -> impl Strategy<Value = Vec<u8>> {
6384 prop::collection::vec(
6385 prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n'),],
6386 0..100,
6387 )
6388 }
6389
6390 #[derive(Debug, Clone)]
6392 enum Operation {
6393 Insert { offset: usize, text: Vec<u8> },
6394 Delete { offset: usize, bytes: usize },
6395 }
6396
6397 fn operation_strategy() -> impl Strategy<Value = Vec<Operation>> {
6398 prop::collection::vec(
6399 prop_oneof![
6400 (0usize..200, text_with_newlines())
6401 .prop_map(|(offset, text)| { Operation::Insert { offset, text } }),
6402 (0usize..200, 1usize..50)
6403 .prop_map(|(offset, bytes)| { Operation::Delete { offset, bytes } }),
6404 ],
6405 0..50,
6406 )
6407 }
6408
6409 proptest! {
6410 #[test]
6411 fn prop_line_count_consistent(text in text_with_newlines()) {
6412 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
6413
6414 let newline_count = text.iter().filter(|&&b| b == b'\n').count();
6415 prop_assert_eq!(buffer.line_count(), Some(newline_count + 1));
6416 }
6417
6418 #[test]
6419 fn prop_get_all_text_matches_original(text in text_with_newlines()) {
6420 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
6421 prop_assert_eq!(buffer.get_all_text().unwrap(), text);
6422 }
6423
6424 #[test]
6425 fn prop_insert_increases_size(
6426 text in text_with_newlines(),
6427 offset in 0usize..100,
6428 insert_text in text_with_newlines()
6429 ) {
6430 let mut buffer = TextBuffer::from_bytes(text, test_fs());
6431 let initial_bytes = buffer.total_bytes();
6432
6433 let offset = offset.min(buffer.total_bytes());
6434 buffer.insert_bytes(offset, insert_text.clone());
6435
6436 prop_assert_eq!(buffer.total_bytes(), initial_bytes + insert_text.len());
6437 }
6438
6439 #[test]
6440 fn prop_delete_decreases_size(
6441 text in text_with_newlines(),
6442 offset in 0usize..100,
6443 delete_bytes in 1usize..50
6444 ) {
6445 if text.is_empty() {
6446 return Ok(());
6447 }
6448
6449 let mut buffer = TextBuffer::from_bytes(text, test_fs());
6450 let initial_bytes = buffer.total_bytes();
6451
6452 let offset = offset.min(buffer.total_bytes());
6453 let delete_bytes = delete_bytes.min(buffer.total_bytes() - offset);
6454
6455 if delete_bytes == 0 {
6456 return Ok(());
6457 }
6458
6459 buffer.delete_bytes(offset, delete_bytes);
6460
6461 prop_assert_eq!(buffer.total_bytes(), initial_bytes - delete_bytes);
6462 }
6463
6464 #[test]
6465 fn prop_insert_then_delete_restores_original(
6466 text in text_with_newlines(),
6467 offset in 0usize..100,
6468 insert_text in text_with_newlines()
6469 ) {
6470 let mut buffer = TextBuffer::from_bytes(text.clone(), test_fs());
6471
6472 let offset = offset.min(buffer.total_bytes());
6473 buffer.insert_bytes(offset, insert_text.clone());
6474 buffer.delete_bytes(offset, insert_text.len());
6475
6476 prop_assert_eq!(buffer.get_all_text().unwrap(), text);
6477 }
6478
6479 #[test]
6480 fn prop_offset_position_roundtrip(text in text_with_newlines()) {
6481 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
6482
6483 for offset in 0..text.len() {
6484 let pos = buffer.offset_to_position(offset).expect("offset_to_position should succeed for valid offset");
6485 let back = buffer.position_to_offset(pos);
6486 prop_assert_eq!(back, offset, "Failed roundtrip for offset {}", offset);
6487 }
6488 }
6489
6490 #[test]
6491 fn prop_get_text_range_valid(
6492 text in text_with_newlines(),
6493 offset in 0usize..100,
6494 length in 1usize..50
6495 ) {
6496 if text.is_empty() {
6497 return Ok(());
6498 }
6499
6500 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
6501 let offset = offset.min(buffer.total_bytes());
6502 let length = length.min(buffer.total_bytes() - offset);
6503
6504 if length == 0 {
6505 return Ok(());
6506 }
6507
6508 let result = buffer.get_text_range(offset, length);
6509 prop_assert_eq!(result, Some(text[offset..offset + length].to_vec()));
6510 }
6511
6512 #[test]
6513 fn prop_operations_maintain_consistency(operations in operation_strategy()) {
6514 let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
6515 let mut expected_text = b"initial\ntext".to_vec();
6516
6517 for op in operations {
6518 match op {
6519 Operation::Insert { offset, text } => {
6520 let offset = offset.min(buffer.total_bytes());
6521 buffer.insert_bytes(offset, text.clone());
6522
6523 let offset = offset.min(expected_text.len());
6525 expected_text.splice(offset..offset, text);
6526 }
6527 Operation::Delete { offset, bytes } => {
6528 if offset < buffer.total_bytes() {
6529 let bytes = bytes.min(buffer.total_bytes() - offset);
6530 buffer.delete_bytes(offset, bytes);
6531
6532 if offset < expected_text.len() {
6534 let bytes = bytes.min(expected_text.len() - offset);
6535 expected_text.drain(offset..offset + bytes);
6536 }
6537 }
6538 }
6539 }
6540 }
6541
6542 prop_assert_eq!(buffer.get_all_text().unwrap(), expected_text);
6543 }
6544
6545 #[test]
6546 fn prop_line_count_never_zero(operations in operation_strategy()) {
6547 let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
6548
6549 for op in operations {
6550 match op {
6551 Operation::Insert { offset, text } => {
6552 let offset = offset.min(buffer.total_bytes());
6553 buffer.insert_bytes(offset, text);
6554 }
6555 Operation::Delete { offset, bytes } => {
6556 buffer.delete_bytes(offset, bytes);
6557 }
6558 }
6559
6560 prop_assert!(buffer.line_count().unwrap_or(1) >= 1);
6562 }
6563 }
6564
6565 #[test]
6566 fn prop_total_bytes_never_negative(operations in operation_strategy()) {
6567 let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
6568
6569 for op in operations {
6570 match op {
6571 Operation::Insert { offset, text } => {
6572 let offset = offset.min(buffer.total_bytes());
6573 buffer.insert_bytes(offset, text);
6574 }
6575 Operation::Delete { offset, bytes } => {
6576 buffer.delete_bytes(offset, bytes);
6577 }
6578 }
6579
6580 prop_assert!(buffer.total_bytes() < 10_000_000);
6582 }
6583 }
6584
6585 #[test]
6586 fn prop_piece_tree_and_line_index_stay_synced(operations in operation_strategy()) {
6587 let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
6588
6589 for op in operations {
6590 match op {
6591 Operation::Insert { offset, text } => {
6592 let offset = offset.min(buffer.total_bytes());
6593 buffer.insert_bytes(offset, text);
6594 }
6595 Operation::Delete { offset, bytes } => {
6596 buffer.delete_bytes(offset, bytes);
6597 }
6598 }
6599
6600 if buffer.total_bytes() > 0 {
6602 let mid_offset = buffer.total_bytes() / 2;
6603 if let Some(pos) = buffer.offset_to_position(mid_offset) {
6604 let back = buffer.position_to_offset(pos);
6605
6606 prop_assert!(back <= buffer.total_bytes());
6608 }
6609 }
6610 }
6611 }
6612
6613 #[test]
6614 fn prop_write_recipe_matches_content(text in text_with_newlines()) {
6615 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
6616 let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
6617
6618 let output = apply_recipe(&buffer, &recipe);
6620 prop_assert_eq!(output, text, "Recipe output should match original content");
6621 }
6622
6623 #[test]
6624 fn prop_write_recipe_after_edits(
6625 initial_text in text_with_newlines(),
6626 operations in operation_strategy()
6627 ) {
6628 let mut buffer = TextBuffer::from_bytes(initial_text, test_fs());
6629
6630 for op in operations {
6632 match op {
6633 Operation::Insert { offset, text } => {
6634 let offset = offset.min(buffer.total_bytes());
6635 buffer.insert_bytes(offset, text);
6636 }
6637 Operation::Delete { offset, bytes } => {
6638 if offset < buffer.total_bytes() {
6639 let bytes = bytes.min(buffer.total_bytes() - offset);
6640 if bytes > 0 {
6641 buffer.delete_bytes(offset, bytes);
6642 }
6643 }
6644 }
6645 }
6646 }
6647
6648 let expected = buffer.get_all_text().unwrap();
6650 let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
6651 let output = apply_recipe(&buffer, &recipe);
6652
6653 prop_assert_eq!(output, expected, "Recipe output should match buffer content after edits");
6654 }
6655
6656 #[test]
6657 fn prop_write_recipe_copy_ops_valid(
6658 text in prop::collection::vec(prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n')], 10..200),
6659 edit_offset in 0usize..100,
6660 edit_text in text_with_newlines()
6661 ) {
6662 use tempfile::TempDir;
6663
6664 let temp_dir = TempDir::new().unwrap();
6666 let file_path = temp_dir.path().join("test.txt");
6667 std::fs::write(&file_path, &text).unwrap();
6668
6669 let mut buffer = TextBuffer::load_from_file(&file_path, 1024 * 1024, test_fs()).unwrap();
6671
6672 let edit_offset = edit_offset.min(buffer.total_bytes());
6674 buffer.insert_bytes(edit_offset, edit_text.clone());
6675
6676 let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
6678
6679 let expected = buffer.get_all_text().unwrap();
6681 let output = apply_recipe(&buffer, &recipe);
6682 prop_assert_eq!(output, expected, "Recipe with Copy ops should match buffer content");
6683
6684 if text.len() > 100 && edit_offset > 10 {
6687 let has_copy = recipe.actions.iter().any(|a| matches!(a, RecipeAction::Copy { .. }));
6688 let _ = has_copy;
6691 }
6692 }
6693 }
6694
6695 fn apply_recipe(buffer: &TextBuffer, recipe: &WriteRecipe) -> Vec<u8> {
6697 let mut output = Vec::new();
6698 for action in &recipe.actions {
6699 match action {
6700 RecipeAction::Copy { offset, len } => {
6701 if let Some(src_path) = &recipe.src_path {
6702 let data = buffer
6703 .fs
6704 .read_range(src_path, *offset, *len as usize)
6705 .expect("read_range should succeed for Copy op");
6706 output.extend_from_slice(&data);
6707 } else {
6708 panic!("Copy action without source path");
6709 }
6710 }
6711 RecipeAction::Insert { index } => {
6712 output.extend_from_slice(&recipe.insert_data[*index]);
6713 }
6714 }
6715 }
6716 output
6717 }
6718
6719 fn is_detected_as_binary(bytes: &[u8]) -> bool {
6721 TextBuffer::detect_encoding_or_binary(bytes).1
6722 }
6723
6724 #[test]
6725 fn test_detect_binary_text_files() {
6726 assert!(!is_detected_as_binary(b"Hello, world!"));
6728 assert!(!is_detected_as_binary(b"Line 1\nLine 2\nLine 3"));
6729 assert!(!is_detected_as_binary(b"Tabs\tand\tnewlines\n"));
6730 assert!(!is_detected_as_binary(b"Carriage return\r\n"));
6731
6732 assert!(!is_detected_as_binary(b""));
6734
6735 assert!(!is_detected_as_binary(b"\x1b[31mRed text\x1b[0m"));
6737 }
6738
6739 #[test]
6740 fn test_detect_binary_binary_files() {
6741 assert!(is_detected_as_binary(b"Hello\x00World"));
6743 assert!(is_detected_as_binary(b"\x00"));
6744
6745 assert!(is_detected_as_binary(b"Text with \x01 control char"));
6747 assert!(is_detected_as_binary(b"\x02\x03\x04"));
6748
6749 assert!(is_detected_as_binary(b"Text with DEL\x7F"));
6751 }
6752
6753 #[test]
6754 fn test_detect_binary_png_file() {
6755 let png_header: &[u8] = &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
6758 assert!(is_detected_as_binary(png_header));
6759
6760 let mut png_data = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
6762 png_data.extend_from_slice(b"\x00\x00\x00\x0DIHDR"); assert!(is_detected_as_binary(&png_data));
6764 }
6765
6766 #[test]
6767 fn test_detect_binary_other_image_formats() {
6768 let jpeg_header: &[u8] = &[0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10];
6770 assert!(is_detected_as_binary(jpeg_header));
6771
6772 let gif_data: &[u8] = &[
6775 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, ];
6781 assert!(is_detected_as_binary(gif_data));
6783
6784 let bmp_header: &[u8] = &[0x42, 0x4D, 0x00, 0x00, 0x00, 0x00];
6786 assert!(is_detected_as_binary(bmp_header));
6787 }
6788
6789 #[test]
6790 fn test_detect_binary_executable_formats() {
6791 let elf_header: &[u8] = &[0x7F, 0x45, 0x4C, 0x46, 0x02, 0x01, 0x01, 0x00];
6793 assert!(is_detected_as_binary(elf_header));
6794
6795 let macho_header: &[u8] = &[0xCF, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x01];
6797 assert!(is_detected_as_binary(macho_header));
6798
6799 let pe_header: &[u8] = &[0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00];
6801 assert!(is_detected_as_binary(pe_header));
6802 }
6803}
6804
6805#[derive(Debug, Clone)]
6807pub struct LineData {
6808 pub byte_offset: usize,
6810 pub content: String,
6812 pub has_newline: bool,
6814 pub line_number: Option<usize>,
6816}
6817
6818pub struct TextBufferLineIterator {
6821 lines: Vec<LineData>,
6823 current_index: usize,
6825 pub has_more: bool,
6827}
6828
6829impl TextBufferLineIterator {
6830 pub(crate) fn new(buffer: &mut TextBuffer, byte_pos: usize, max_lines: usize) -> Result<Self> {
6831 let buffer_len = buffer.len();
6832 if byte_pos >= buffer_len {
6833 return Ok(Self {
6834 lines: Vec::new(),
6835 current_index: 0,
6836 has_more: false,
6837 });
6838 }
6839
6840 let has_line_metadata = buffer.line_count().is_some();
6842
6843 let mut current_line = if has_line_metadata {
6846 buffer.offset_to_position(byte_pos).map(|pos| pos.line)
6847 } else {
6848 None
6849 };
6850
6851 let mut lines = Vec::with_capacity(max_lines);
6852 let mut current_offset = byte_pos;
6853 let estimated_line_length = 80; for _ in 0..max_lines {
6857 if current_offset >= buffer_len {
6858 break;
6859 }
6860
6861 let line_start = current_offset;
6862 let line_number = current_line;
6863
6864 let estimated_max_line_length = estimated_line_length * 3;
6866 let bytes_to_scan = estimated_max_line_length.min(buffer_len - current_offset);
6867
6868 let chunk = buffer.get_text_range_mut(current_offset, bytes_to_scan)?;
6870
6871 let mut line_len = 0;
6873 let mut found_newline = false;
6874 for &byte in chunk.iter() {
6875 line_len += 1;
6876 if byte == b'\n' {
6877 found_newline = true;
6878 break;
6879 }
6880 }
6881
6882 if !found_newline && current_offset + line_len < buffer_len {
6884 let remaining = buffer_len - current_offset - line_len;
6886 let additional_bytes = estimated_max_line_length.min(remaining);
6887 let more_chunk =
6888 buffer.get_text_range_mut(current_offset + line_len, additional_bytes)?;
6889
6890 let mut extended_chunk = chunk;
6891 extended_chunk.extend_from_slice(&more_chunk);
6892
6893 for &byte in more_chunk.iter() {
6894 line_len += 1;
6895 if byte == b'\n' {
6896 found_newline = true;
6897 break;
6898 }
6899 }
6900
6901 let line_string = String::from_utf8_lossy(&extended_chunk[..line_len]).into_owned();
6902 let has_newline = line_string.ends_with('\n');
6903 let content = if has_newline {
6904 line_string[..line_string.len() - 1].to_string()
6905 } else {
6906 line_string
6907 };
6908
6909 lines.push(LineData {
6910 byte_offset: line_start,
6911 content,
6912 has_newline,
6913 line_number,
6914 });
6915
6916 current_offset += line_len;
6917 if has_line_metadata && found_newline {
6918 current_line = current_line.map(|n| n + 1);
6919 }
6920 continue;
6921 }
6922
6923 let line_string = String::from_utf8_lossy(&chunk[..line_len]).into_owned();
6925 let has_newline = line_string.ends_with('\n');
6926 let content = if has_newline {
6927 line_string[..line_string.len() - 1].to_string()
6928 } else {
6929 line_string
6930 };
6931
6932 lines.push(LineData {
6933 byte_offset: line_start,
6934 content,
6935 has_newline,
6936 line_number,
6937 });
6938
6939 current_offset += line_len;
6940 if has_line_metadata && found_newline {
6942 current_line = current_line.map(|n| n + 1);
6943 }
6944 }
6945
6946 let has_more = current_offset < buffer_len;
6948
6949 Ok(Self {
6950 lines,
6951 current_index: 0,
6952 has_more,
6953 })
6954 }
6955}
6956
6957impl Iterator for TextBufferLineIterator {
6958 type Item = LineData;
6959
6960 fn next(&mut self) -> Option<Self::Item> {
6961 if self.current_index < self.lines.len() {
6962 let line = self.lines[self.current_index].clone();
6963 self.current_index += 1;
6964 Some(line)
6965 } else {
6966 None
6967 }
6968 }
6969}