1use crate::model::encoding;
4use crate::model::filesystem::{
5 FileMetadata, FileSearchCursor, FileSearchOptions, FileSystem, WriteOp,
6};
7use crate::model::piece_tree::{
8 BufferData, BufferLocation, Cursor, PieceInfo, PieceRangeIter, PieceTree, PieceView, Position,
9 StringBuffer, TreeStats,
10};
11use crate::model::piece_tree_diff::PieceTreeDiff;
12use crate::primitives::grapheme;
13use anyhow::{Context, Result};
14use regex::bytes::Regex;
15use std::io::{self, Write};
16use std::ops::Range;
17use std::path::{Path, PathBuf};
18use std::sync::Arc;
19
20pub use encoding::Encoding;
22
23#[derive(Debug, Clone, PartialEq)]
28pub struct SudoSaveRequired {
29 pub temp_path: PathBuf,
31 pub dest_path: PathBuf,
33 pub uid: u32,
35 pub gid: u32,
37 pub mode: u32,
39}
40
41impl std::fmt::Display for SudoSaveRequired {
42 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43 write!(
44 f,
45 "Permission denied saving to {}. Use sudo to complete the operation.",
46 self.dest_path.display()
47 )
48 }
49}
50
51impl std::error::Error for SudoSaveRequired {}
52
53#[derive(Debug, Clone, PartialEq)]
60pub struct LargeFileEncodingConfirmation {
61 pub path: PathBuf,
63 pub file_size: usize,
65 pub encoding: Encoding,
67}
68
69impl std::fmt::Display for LargeFileEncodingConfirmation {
70 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71 let size_mb = self.file_size as f64 / (1024.0 * 1024.0);
72 write!(
73 f,
74 "{} ({:.0} MB) requires full load. (l)oad, (e)ncoding, (C)ancel? ",
75 self.encoding.display_name(),
76 size_mb
77 )
78 }
79}
80
81impl std::error::Error for LargeFileEncodingConfirmation {}
82
83#[derive(Debug, Clone)]
85pub struct LineScanChunk {
86 pub leaf_index: usize,
88 pub byte_len: usize,
90 pub already_known: bool,
92}
93
94pub use crate::model::filesystem::SearchMatch;
98
99#[derive(Debug)]
113pub struct ChunkedSearchState {
114 pub chunks: Vec<LineScanChunk>,
116 pub next_chunk: usize,
118 pub next_doc_offset: usize,
120 pub total_bytes: usize,
122 pub scanned_bytes: usize,
124 pub regex: regex::bytes::Regex,
126 pub matches: Vec<SearchMatch>,
128 pub overlap_tail: Vec<u8>,
130 pub overlap_doc_offset: usize,
132 pub max_matches: usize,
134 pub capped: bool,
136 pub query_len: usize,
138 pub(crate) running_line: usize,
141}
142
143impl ChunkedSearchState {
144 pub fn is_done(&self) -> bool {
146 self.next_chunk >= self.chunks.len() || self.capped
147 }
148
149 pub fn progress_percent(&self) -> usize {
151 if self.total_bytes > 0 {
152 (self.scanned_bytes * 100) / self.total_bytes
153 } else {
154 100
155 }
156 }
157}
158
159pub const DEFAULT_LARGE_FILE_THRESHOLD: usize = 100 * 1024 * 1024;
162
163pub const LOAD_CHUNK_SIZE: usize = 1024 * 1024;
165
166pub const CHUNK_ALIGNMENT: usize = 64 * 1024;
168
169#[derive(Debug, Clone)]
171pub struct BufferConfig {
172 pub estimated_line_length: usize,
175}
176
177impl Default for BufferConfig {
178 fn default() -> Self {
179 Self {
180 estimated_line_length: 80,
181 }
182 }
183}
184
185#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
187pub enum LineEnding {
188 #[default]
190 LF,
191 CRLF,
193 CR,
195}
196
197impl LineEnding {
198 pub fn as_str(&self) -> &'static str {
200 match self {
201 Self::LF => "\n",
202 Self::CRLF => "\r\n",
203 Self::CR => "\r",
204 }
205 }
206
207 pub fn display_name(&self) -> &'static str {
209 match self {
210 Self::LF => "LF",
211 Self::CRLF => "CRLF",
212 Self::CR => "CR",
213 }
214 }
215}
216
217struct WriteRecipe {
219 src_path: Option<PathBuf>,
221 insert_data: Vec<Vec<u8>>,
223 actions: Vec<RecipeAction>,
225}
226
227#[derive(Debug, Clone, Copy)]
229enum RecipeAction {
230 Copy { offset: u64, len: u64 },
232 Insert { index: usize },
234}
235
236impl WriteRecipe {
237 fn to_write_ops(&self) -> Vec<WriteOp<'_>> {
239 self.actions
240 .iter()
241 .map(|action| match action {
242 RecipeAction::Copy { offset, len } => WriteOp::Copy {
243 offset: *offset,
244 len: *len,
245 },
246 RecipeAction::Insert { index } => WriteOp::Insert {
247 data: &self.insert_data[*index],
248 },
249 })
250 .collect()
251 }
252
253 fn has_copy_ops(&self) -> bool {
255 self.actions
256 .iter()
257 .any(|a| matches!(a, RecipeAction::Copy { .. }))
258 }
259
260 fn flatten_inserts(&self) -> Vec<u8> {
263 let mut result = Vec::new();
264 for action in &self.actions {
265 if let RecipeAction::Insert { index } = action {
266 result.extend_from_slice(&self.insert_data[*index]);
267 }
268 }
269 result
270 }
271}
272
273#[derive(Debug, Clone, Copy, PartialEq, Eq)]
276pub enum LineNumber {
277 Absolute(usize),
279 Relative {
281 line: usize,
282 from_cached_line: usize,
283 },
284}
285
286impl LineNumber {
287 pub fn value(&self) -> usize {
289 match self {
290 Self::Absolute(line) | Self::Relative { line, .. } => *line,
291 }
292 }
293
294 pub fn is_absolute(&self) -> bool {
296 matches!(self, LineNumber::Absolute(_))
297 }
298
299 pub fn is_relative(&self) -> bool {
301 matches!(self, LineNumber::Relative { .. })
302 }
303
304 pub fn format(&self) -> String {
306 match self {
307 Self::Absolute(line) => format!("{}", line + 1),
308 Self::Relative { line, .. } => format!("~{}", line + 1),
309 }
310 }
311}
312
313pub struct TextBuffer {
316 fs: Arc<dyn FileSystem + Send + Sync>,
319
320 piece_tree: PieceTree,
322
323 saved_root: Arc<crate::model::piece_tree::PieceTreeNode>,
325
326 buffers: Vec<StringBuffer>,
330
331 next_buffer_id: usize,
333
334 file_path: Option<PathBuf>,
336
337 modified: bool,
339
340 recovery_pending: bool,
344
345 large_file: bool,
347
348 line_feeds_scanned: bool,
353
354 is_binary: bool,
357
358 line_ending: LineEnding,
360
361 original_line_ending: LineEnding,
365
366 encoding: Encoding,
368
369 original_encoding: Encoding,
372
373 saved_file_size: Option<usize>,
377
378 version: u64,
380
381 config: BufferConfig,
383}
384
385#[derive(Debug, Clone)]
391pub struct BufferSnapshot {
392 pub piece_tree: PieceTree,
393 pub buffers: Vec<StringBuffer>,
394 pub next_buffer_id: usize,
395}
396
397impl TextBuffer {
398 pub fn new(_large_file_threshold: usize, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
401 let piece_tree = PieceTree::empty();
402 let line_ending = LineEnding::default();
403 let encoding = Encoding::default();
404 TextBuffer {
405 fs,
406 saved_root: piece_tree.root(),
407 piece_tree,
408 buffers: vec![StringBuffer::new(0, Vec::new())],
409 next_buffer_id: 1,
410 file_path: None,
411 modified: false,
412 recovery_pending: false,
413 large_file: false,
414 line_feeds_scanned: false,
415 is_binary: false,
416 line_ending,
417 original_line_ending: line_ending,
418 encoding,
419 original_encoding: encoding,
420 saved_file_size: None,
421 version: 0,
422 config: BufferConfig::default(),
423 }
424 }
425
426 pub fn new_with_path(
429 large_file_threshold: usize,
430 fs: Arc<dyn FileSystem + Send + Sync>,
431 path: PathBuf,
432 ) -> Self {
433 let mut buffer = Self::new(large_file_threshold, fs);
434 buffer.file_path = Some(path);
435 buffer
436 }
437
438 pub fn version(&self) -> u64 {
440 self.version
441 }
442
443 pub fn filesystem(&self) -> &Arc<dyn FileSystem + Send + Sync> {
445 &self.fs
446 }
447
448 pub fn set_filesystem(&mut self, fs: Arc<dyn FileSystem + Send + Sync>) {
450 self.fs = fs;
451 }
452
453 #[inline]
454 fn bump_version(&mut self) {
455 self.version = self.version.wrapping_add(1);
456 }
457
458 #[inline]
459 fn mark_content_modified(&mut self) {
460 self.modified = true;
461 self.recovery_pending = true;
462 self.bump_version();
463 }
464
465 fn from_bytes_raw(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
468 let bytes = content.len();
469
470 let line_ending = Self::detect_line_ending(&content);
472
473 let buffer = StringBuffer::new(0, content);
475 let line_feed_cnt = buffer.line_feed_count();
476
477 let piece_tree = if bytes > 0 {
478 PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
479 } else {
480 PieceTree::empty()
481 };
482
483 let saved_root = piece_tree.root();
484
485 TextBuffer {
486 fs,
487 line_ending,
488 original_line_ending: line_ending,
489 encoding: Encoding::Utf8, original_encoding: Encoding::Utf8,
491 piece_tree,
492 saved_root,
493 buffers: vec![buffer],
494 next_buffer_id: 1,
495 file_path: None,
496 modified: false,
497 recovery_pending: false,
498 large_file: false,
499 line_feeds_scanned: false,
500 is_binary: true,
501 saved_file_size: Some(bytes),
502 version: 0,
503 config: BufferConfig::default(),
504 }
505 }
506
507 pub fn from_bytes(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
509 let (encoding, utf8_content) = Self::detect_and_convert_encoding(&content);
511
512 let bytes = utf8_content.len();
513
514 let line_ending = Self::detect_line_ending(&utf8_content);
516
517 let buffer = StringBuffer::new(0, utf8_content);
519 let line_feed_cnt = buffer.line_feed_count();
520
521 let piece_tree = if bytes > 0 {
522 PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
523 } else {
524 PieceTree::empty()
525 };
526
527 let saved_root = piece_tree.root();
528
529 TextBuffer {
530 fs,
531 line_ending,
532 original_line_ending: line_ending,
533 encoding,
534 original_encoding: encoding,
535 piece_tree,
536 saved_root,
537 buffers: vec![buffer],
538 next_buffer_id: 1,
539 file_path: None,
540 modified: false,
541 recovery_pending: false,
542 large_file: false,
543 line_feeds_scanned: false,
544 is_binary: false,
545 saved_file_size: Some(bytes), version: 0,
547 config: BufferConfig::default(),
548 }
549 }
550
551 pub fn from_bytes_with_encoding(
553 content: Vec<u8>,
554 encoding: Encoding,
555 fs: Arc<dyn FileSystem + Send + Sync>,
556 ) -> Self {
557 let utf8_content = encoding::convert_to_utf8(&content, encoding);
559
560 let bytes = utf8_content.len();
561
562 let line_ending = Self::detect_line_ending(&utf8_content);
564
565 let buffer = StringBuffer::new(0, utf8_content);
567 let line_feed_cnt = buffer.line_feed_count();
568
569 let piece_tree = if bytes > 0 {
570 PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
571 } else {
572 PieceTree::empty()
573 };
574
575 let saved_root = piece_tree.root();
576
577 TextBuffer {
578 fs,
579 line_ending,
580 original_line_ending: line_ending,
581 encoding,
582 original_encoding: encoding,
583 piece_tree,
584 saved_root,
585 buffers: vec![buffer],
586 next_buffer_id: 1,
587 file_path: None,
588 modified: false,
589 recovery_pending: false,
590 large_file: false,
591 line_feeds_scanned: false,
592 is_binary: false,
593 saved_file_size: Some(bytes),
594 version: 0,
595 config: BufferConfig::default(),
596 }
597 }
598
599 pub fn from_str(
601 s: &str,
602 _large_file_threshold: usize,
603 fs: Arc<dyn FileSystem + Send + Sync>,
604 ) -> Self {
605 Self::from_bytes(s.as_bytes().to_vec(), fs)
606 }
607
608 pub fn empty(fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
610 let piece_tree = PieceTree::empty();
611 let saved_root = piece_tree.root();
612 let line_ending = LineEnding::default();
613 let encoding = Encoding::default();
614 TextBuffer {
615 fs,
616 piece_tree,
617 saved_root,
618 buffers: vec![StringBuffer::new(0, Vec::new())],
619 next_buffer_id: 1,
620 file_path: None,
621 modified: false,
622 recovery_pending: false,
623 large_file: false,
624 line_feeds_scanned: false,
625 is_binary: false,
626 line_ending,
627 original_line_ending: line_ending,
628 encoding,
629 original_encoding: encoding,
630 saved_file_size: None,
631 version: 0,
632 config: BufferConfig::default(),
633 }
634 }
635
636 pub fn load_from_file<P: AsRef<Path>>(
638 path: P,
639 large_file_threshold: usize,
640 fs: Arc<dyn FileSystem + Send + Sync>,
641 ) -> anyhow::Result<Self> {
642 let path = path.as_ref();
643
644 let metadata = fs.metadata(path)?;
646 let file_size = metadata.size as usize;
647
648 let threshold = if large_file_threshold > 0 {
650 large_file_threshold
651 } else {
652 DEFAULT_LARGE_FILE_THRESHOLD
653 };
654
655 if file_size >= threshold {
657 Self::load_large_file(path, file_size, fs)
658 } else {
659 Self::load_small_file(path, fs)
660 }
661 }
662
663 pub fn load_from_file_with_encoding<P: AsRef<Path>>(
665 path: P,
666 encoding: Encoding,
667 fs: Arc<dyn FileSystem + Send + Sync>,
668 config: BufferConfig,
669 ) -> anyhow::Result<Self> {
670 let path = path.as_ref();
671 let contents = fs.read_file(path)?;
672
673 let mut buffer = Self::from_bytes_with_encoding(contents, encoding, fs);
674 buffer.file_path = Some(path.to_path_buf());
675 buffer.modified = false;
676 buffer.config = config;
677 Ok(buffer)
678 }
679
680 fn load_small_file(path: &Path, fs: Arc<dyn FileSystem + Send + Sync>) -> anyhow::Result<Self> {
682 let contents = fs.read_file(path)?;
683
684 let (encoding, is_binary) = Self::detect_encoding_or_binary(&contents, false);
686
687 let mut buffer = if is_binary {
689 Self::from_bytes_raw(contents, fs)
690 } else {
691 Self::from_bytes(contents, fs)
693 };
694 buffer.file_path = Some(path.to_path_buf());
695 buffer.modified = false;
696 buffer.large_file = false;
697 buffer.is_binary = is_binary;
698 if is_binary {
700 buffer.encoding = encoding;
701 buffer.original_encoding = encoding;
702 }
703 Ok(buffer)
705 }
706
707 pub fn check_large_file_encoding(
716 path: impl AsRef<Path>,
717 fs: Arc<dyn FileSystem + Send + Sync>,
718 ) -> anyhow::Result<Option<LargeFileEncodingConfirmation>> {
719 let path = path.as_ref();
720 let metadata = fs.metadata(path)?;
721 let file_size = metadata.size as usize;
722
723 if file_size < DEFAULT_LARGE_FILE_THRESHOLD {
725 return Ok(None);
726 }
727
728 let sample_size = file_size.min(8 * 1024);
730 let sample = fs.read_range(path, 0, sample_size)?;
731 let (encoding, is_binary) =
732 Self::detect_encoding_or_binary(&sample, file_size > sample_size);
733
734 if is_binary {
736 return Ok(None);
737 }
738
739 if encoding.requires_full_file_load() {
741 return Ok(Some(LargeFileEncodingConfirmation {
742 path: path.to_path_buf(),
743 file_size,
744 encoding,
745 }));
746 }
747
748 Ok(None)
749 }
750
751 fn load_large_file(
756 path: &Path,
757 file_size: usize,
758 fs: Arc<dyn FileSystem + Send + Sync>,
759 ) -> anyhow::Result<Self> {
760 Self::load_large_file_internal(path, file_size, fs, false)
761 }
762
763 pub fn load_large_file_confirmed(
768 path: impl AsRef<Path>,
769 fs: Arc<dyn FileSystem + Send + Sync>,
770 ) -> anyhow::Result<Self> {
771 let path = path.as_ref();
772 let metadata = fs.metadata(path)?;
773 let file_size = metadata.size as usize;
774 Self::load_large_file_internal(path, file_size, fs, true)
775 }
776
777 fn load_large_file_internal(
779 path: &Path,
780 file_size: usize,
781 fs: Arc<dyn FileSystem + Send + Sync>,
782 force_full_load: bool,
783 ) -> anyhow::Result<Self> {
784 use crate::model::piece_tree::{BufferData, BufferLocation};
785
786 let sample_size = file_size.min(8 * 1024);
789 let sample = fs.read_range(path, 0, sample_size)?;
790
791 let (encoding, is_binary) =
793 Self::detect_encoding_or_binary(&sample, file_size > sample_size);
794
795 if is_binary {
797 tracing::info!("Large binary file detected, loading without encoding conversion");
798 let contents = fs.read_file(path)?;
799 let mut buffer = Self::from_bytes_raw(contents, fs);
800 buffer.file_path = Some(path.to_path_buf());
801 buffer.modified = false;
802 buffer.large_file = true;
803 buffer.encoding = encoding;
804 buffer.original_encoding = encoding;
805 return Ok(buffer);
806 }
807
808 let requires_full_load = encoding.requires_full_file_load();
810
811 if requires_full_load && !force_full_load {
813 anyhow::bail!(LargeFileEncodingConfirmation {
814 path: path.to_path_buf(),
815 file_size,
816 encoding,
817 });
818 }
819
820 if !matches!(encoding, Encoding::Utf8 | Encoding::Ascii) {
823 tracing::info!(
824 "Large file with non-UTF-8 encoding ({:?}), loading fully for conversion",
825 encoding
826 );
827 let contents = fs.read_file(path)?;
828 let mut buffer = Self::from_bytes(contents, fs);
829 buffer.file_path = Some(path.to_path_buf());
830 buffer.modified = false;
831 buffer.large_file = true; buffer.is_binary = is_binary;
833 return Ok(buffer);
834 }
835
836 let line_ending = Self::detect_line_ending(&sample);
838
839 let buffer = StringBuffer {
841 id: 0,
842 data: BufferData::Unloaded {
843 file_path: path.to_path_buf(),
844 file_offset: 0,
845 bytes: file_size,
846 },
847 stored_file_offset: None,
848 };
849
850 let piece_tree = if file_size > 0 {
853 PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
854 } else {
855 PieceTree::empty()
856 };
857 let saved_root = piece_tree.root();
858
859 tracing::debug!(
860 "Buffer::load_from_file: loaded {} bytes, saved_file_size={}",
861 file_size,
862 file_size
863 );
864
865 Ok(TextBuffer {
866 fs,
867 piece_tree,
868 saved_root,
869 buffers: vec![buffer],
870 next_buffer_id: 1,
871 file_path: Some(path.to_path_buf()),
872 modified: false,
873 recovery_pending: false,
874 large_file: true,
875 line_feeds_scanned: false,
876 is_binary,
877 line_ending,
878 original_line_ending: line_ending,
879 encoding,
880 original_encoding: encoding,
881 saved_file_size: Some(file_size),
882 version: 0,
883 config: BufferConfig::default(),
884 })
885 }
886
887 pub fn save(&mut self) -> anyhow::Result<()> {
889 if let Some(path) = &self.file_path {
890 self.save_to_file(path.clone())
891 } else {
892 anyhow::bail!(io::Error::new(
893 io::ErrorKind::NotFound,
894 "No file path associated with buffer",
895 ))
896 }
897 }
898
899 fn should_use_inplace_write(&self, dest_path: &Path) -> bool {
906 !self.fs.is_owner(dest_path)
907 }
908
909 fn build_write_recipe(&self) -> io::Result<WriteRecipe> {
918 let total = self.total_bytes();
919
920 let needs_line_ending_conversion = self.line_ending != self.original_line_ending;
927 let needs_encoding_conversion = !self.is_binary
933 && (self.encoding != self.original_encoding
934 || !matches!(self.encoding, Encoding::Utf8 | Encoding::Ascii));
935 let needs_conversion = needs_line_ending_conversion || needs_encoding_conversion;
936
937 let src_path_for_copy: Option<&Path> = if needs_conversion {
938 None
939 } else {
940 self.file_path.as_deref().filter(|p| self.fs.exists(p))
941 };
942 let target_ending = self.line_ending;
943 let target_encoding = self.encoding;
944
945 let mut insert_data: Vec<Vec<u8>> = Vec::new();
946 let mut actions: Vec<RecipeAction> = Vec::new();
947
948 if let Some(bom) = target_encoding.bom_bytes() {
950 insert_data.push(bom.to_vec());
951 actions.push(RecipeAction::Insert { index: 0 });
952 }
953
954 for piece_view in self.piece_tree.iter_pieces_in_range(0, total) {
955 let buffer_id = piece_view.location.buffer_id();
956 let buffer = self.buffers.get(buffer_id).ok_or_else(|| {
957 io::Error::new(
958 io::ErrorKind::InvalidData,
959 format!("Buffer {} not found", buffer_id),
960 )
961 })?;
962
963 match &buffer.data {
964 BufferData::Unloaded {
966 file_path,
967 file_offset,
968 ..
969 } => {
970 let can_copy = matches!(piece_view.location, BufferLocation::Stored(_))
976 && src_path_for_copy.is_some_and(|src| file_path == src);
977
978 if can_copy {
979 let src_offset = (*file_offset + piece_view.buffer_offset) as u64;
980 actions.push(RecipeAction::Copy {
981 offset: src_offset,
982 len: piece_view.bytes as u64,
983 });
984 continue;
985 }
986
987 let data = self.fs.read_range(
990 file_path,
991 (*file_offset + piece_view.buffer_offset) as u64,
992 piece_view.bytes,
993 )?;
994
995 let data = if needs_line_ending_conversion {
996 Self::convert_line_endings_to(&data, target_ending)
997 } else {
998 data
999 };
1000
1001 let data = if needs_encoding_conversion {
1003 Self::convert_to_encoding(&data, target_encoding)
1004 } else {
1005 data
1006 };
1007
1008 let index = insert_data.len();
1009 insert_data.push(data);
1010 actions.push(RecipeAction::Insert { index });
1011 }
1012
1013 BufferData::Loaded { data, .. } => {
1015 let start = piece_view.buffer_offset;
1016 let end = start + piece_view.bytes;
1017 let chunk = &data[start..end];
1018
1019 let chunk = if needs_line_ending_conversion {
1020 Self::convert_line_endings_to(chunk, target_ending)
1021 } else {
1022 chunk.to_vec()
1023 };
1024
1025 let chunk = if needs_encoding_conversion {
1027 Self::convert_to_encoding(&chunk, target_encoding)
1028 } else {
1029 chunk
1030 };
1031
1032 let index = insert_data.len();
1033 insert_data.push(chunk);
1034 actions.push(RecipeAction::Insert { index });
1035 }
1036 }
1037 }
1038
1039 Ok(WriteRecipe {
1040 src_path: src_path_for_copy.map(|p| p.to_path_buf()),
1041 insert_data,
1042 actions,
1043 })
1044 }
1045
1046 fn create_temp_file(
1052 &self,
1053 dest_path: &Path,
1054 ) -> io::Result<(PathBuf, Box<dyn crate::model::filesystem::FileWriter>)> {
1055 let same_dir_temp = self.fs.temp_path_for(dest_path);
1057 match self.fs.create_file(&same_dir_temp) {
1058 Ok(file) => Ok((same_dir_temp, file)),
1059 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
1060 let temp_path = self.fs.unique_temp_path(dest_path);
1062 let file = self.fs.create_file(&temp_path)?;
1063 Ok((temp_path, file))
1064 }
1065 Err(e) => Err(e),
1066 }
1067 }
1068
1069 fn create_recovery_temp_file(
1072 &self,
1073 dest_path: &Path,
1074 ) -> io::Result<(PathBuf, Box<dyn crate::model::filesystem::FileWriter>)> {
1075 let recovery_dir = crate::input::input_history::get_data_dir()
1077 .map(|d| d.join("recovery"))
1078 .unwrap_or_else(|_| std::env::temp_dir());
1079
1080 self.fs.create_dir_all(&recovery_dir)?;
1082
1083 let file_name = dest_path
1085 .file_name()
1086 .unwrap_or_else(|| std::ffi::OsStr::new("fresh-save"));
1087 let timestamp = std::time::SystemTime::now()
1088 .duration_since(std::time::UNIX_EPOCH)
1089 .map(|d| d.as_nanos())
1090 .unwrap_or(0);
1091 let pid = std::process::id();
1092
1093 let temp_name = format!(
1094 ".inplace-{}-{}-{}.tmp",
1095 file_name.to_string_lossy(),
1096 pid,
1097 timestamp
1098 );
1099 let temp_path = recovery_dir.join(temp_name);
1100
1101 let file = self.fs.create_file(&temp_path)?;
1102 Ok((temp_path, file))
1103 }
1104
1105 fn inplace_recovery_meta_path(&self, dest_path: &Path) -> PathBuf {
1108 let recovery_dir = crate::input::input_history::get_data_dir()
1109 .map(|d| d.join("recovery"))
1110 .unwrap_or_else(|_| std::env::temp_dir());
1111
1112 let hash = crate::services::recovery::path_hash(dest_path);
1113 recovery_dir.join(format!("{}.inplace.json", hash))
1114 }
1115
1116 fn write_inplace_recovery_meta(
1119 &self,
1120 meta_path: &Path,
1121 dest_path: &Path,
1122 temp_path: &Path,
1123 original_metadata: &Option<FileMetadata>,
1124 ) -> io::Result<()> {
1125 #[cfg(unix)]
1126 let (uid, gid, mode) = original_metadata
1127 .as_ref()
1128 .map(|m| {
1129 (
1130 m.uid.unwrap_or(0),
1131 m.gid.unwrap_or(0),
1132 m.permissions.as_ref().map(|p| p.mode()).unwrap_or(0o644),
1133 )
1134 })
1135 .unwrap_or((0, 0, 0o644));
1136 #[cfg(not(unix))]
1137 let (uid, gid, mode) = (0u32, 0u32, 0o644u32);
1138
1139 let recovery = crate::services::recovery::InplaceWriteRecovery::new(
1140 dest_path.to_path_buf(),
1141 temp_path.to_path_buf(),
1142 uid,
1143 gid,
1144 mode,
1145 );
1146
1147 let json = serde_json::to_string_pretty(&recovery)
1148 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1149
1150 self.fs.write_file(meta_path, json.as_bytes())
1151 }
1152
1153 pub fn save_to_file<P: AsRef<Path>>(&mut self, path: P) -> anyhow::Result<()> {
1168 let dest_path = path.as_ref();
1169 let total = self.total_bytes();
1170
1171 if total == 0 {
1173 self.fs.write_file(dest_path, &[])?;
1174 self.finalize_save(dest_path)?;
1175 return Ok(());
1176 }
1177
1178 let recipe = self.build_write_recipe()?;
1180 let ops = recipe.to_write_ops();
1181
1182 let is_local = self.fs.remote_connection_info().is_none();
1185 let use_inplace = is_local && self.should_use_inplace_write(dest_path);
1186
1187 if use_inplace {
1188 self.save_with_inplace_write(dest_path, &recipe)?;
1190 } else if !recipe.has_copy_ops() && !is_local {
1191 let data = recipe.flatten_inserts();
1193 self.fs.write_file(dest_path, &data)?;
1194 } else if is_local {
1195 let write_result = if !recipe.has_copy_ops() {
1197 let data = recipe.flatten_inserts();
1198 self.fs.write_file(dest_path, &data)
1199 } else {
1200 let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
1201 self.fs.write_patched(src_for_patch, dest_path, &ops)
1202 };
1203
1204 if let Err(e) = write_result {
1205 if e.kind() == io::ErrorKind::PermissionDenied {
1206 let original_metadata = self.fs.metadata_if_exists(dest_path);
1208 let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
1209 self.write_recipe_to_file(&mut temp_file, &recipe)?;
1210 temp_file.sync_all()?;
1211 drop(temp_file);
1212 return Err(self.make_sudo_error(temp_path, dest_path, original_metadata));
1213 }
1214 return Err(e.into());
1215 }
1216 } else {
1217 let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
1219 self.fs.write_patched(src_for_patch, dest_path, &ops)?;
1220 }
1221
1222 self.finalize_save(dest_path)?;
1223 Ok(())
1224 }
1225
1226 fn save_with_inplace_write(
1239 &self,
1240 dest_path: &Path,
1241 recipe: &WriteRecipe,
1242 ) -> anyhow::Result<()> {
1243 let original_metadata = self.fs.metadata_if_exists(dest_path);
1244
1245 if !recipe.has_copy_ops() {
1248 let data = recipe.flatten_inserts();
1249 return self.write_data_inplace(dest_path, &data, original_metadata);
1250 }
1251
1252 let (temp_path, mut temp_file) = self.create_recovery_temp_file(dest_path)?;
1256 if let Err(e) = self.write_recipe_to_file(&mut temp_file, recipe) {
1257 #[allow(clippy::let_underscore_must_use)]
1259 let _ = self.fs.remove_file(&temp_path);
1260 return Err(e.into());
1261 }
1262 temp_file.sync_all()?;
1263 drop(temp_file);
1264
1265 let recovery_meta_path = self.inplace_recovery_meta_path(dest_path);
1268 #[allow(clippy::let_underscore_must_use)]
1270 let _ = self.write_inplace_recovery_meta(
1271 &recovery_meta_path,
1272 dest_path,
1273 &temp_path,
1274 &original_metadata,
1275 );
1276
1277 match self.fs.open_file_for_write(dest_path) {
1280 Ok(mut out_file) => {
1281 if let Err(e) = self.stream_file_to_writer(&temp_path, &mut out_file) {
1282 return Err(e.into());
1284 }
1285 out_file.sync_all()?;
1286 #[allow(clippy::let_underscore_must_use)]
1288 let _ = self.fs.remove_file(&temp_path);
1289 #[allow(clippy::let_underscore_must_use)]
1290 let _ = self.fs.remove_file(&recovery_meta_path);
1291 Ok(())
1292 }
1293 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
1294 #[allow(clippy::let_underscore_must_use)]
1297 let _ = self.fs.remove_file(&recovery_meta_path);
1298 Err(self.make_sudo_error(temp_path, dest_path, original_metadata))
1299 }
1300 Err(e) => {
1301 Err(e.into())
1303 }
1304 }
1305 }
1306
1307 fn write_data_inplace(
1309 &self,
1310 dest_path: &Path,
1311 data: &[u8],
1312 original_metadata: Option<FileMetadata>,
1313 ) -> anyhow::Result<()> {
1314 match self.fs.open_file_for_write(dest_path) {
1315 Ok(mut out_file) => {
1316 out_file.write_all(data)?;
1317 out_file.sync_all()?;
1318 Ok(())
1319 }
1320 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
1321 let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
1323 temp_file.write_all(data)?;
1324 temp_file.sync_all()?;
1325 drop(temp_file);
1326 Err(self.make_sudo_error(temp_path, dest_path, original_metadata))
1327 }
1328 Err(e) => Err(e.into()),
1329 }
1330 }
1331
1332 fn stream_file_to_writer(
1334 &self,
1335 src_path: &Path,
1336 out_file: &mut Box<dyn crate::model::filesystem::FileWriter>,
1337 ) -> io::Result<()> {
1338 const CHUNK_SIZE: usize = 1024 * 1024; let file_size = self.fs.metadata(src_path)?.size;
1341 let mut offset = 0u64;
1342
1343 while offset < file_size {
1344 let remaining = file_size - offset;
1345 let chunk_len = std::cmp::min(remaining, CHUNK_SIZE as u64) as usize;
1346 let chunk = self.fs.read_range(src_path, offset, chunk_len)?;
1347 out_file.write_all(&chunk)?;
1348 offset += chunk_len as u64;
1349 }
1350
1351 Ok(())
1352 }
1353
1354 fn write_recipe_to_file(
1356 &self,
1357 out_file: &mut Box<dyn crate::model::filesystem::FileWriter>,
1358 recipe: &WriteRecipe,
1359 ) -> io::Result<()> {
1360 for action in &recipe.actions {
1361 match action {
1362 RecipeAction::Copy { offset, len } => {
1363 let src_path = recipe.src_path.as_ref().ok_or_else(|| {
1365 io::Error::new(io::ErrorKind::InvalidData, "Copy action without source")
1366 })?;
1367 let data = self.fs.read_range(src_path, *offset, *len as usize)?;
1368 out_file.write_all(&data)?;
1369 }
1370 RecipeAction::Insert { index } => {
1371 out_file.write_all(&recipe.insert_data[*index])?;
1372 }
1373 }
1374 }
1375 Ok(())
1376 }
1377
1378 fn finalize_save(&mut self, dest_path: &Path) -> anyhow::Result<()> {
1380 let new_size = self.fs.metadata(dest_path)?.size as usize;
1381 tracing::debug!(
1382 "Buffer::save: updating saved_file_size from {:?} to {}",
1383 self.saved_file_size,
1384 new_size
1385 );
1386 self.saved_file_size = Some(new_size);
1387 self.file_path = Some(dest_path.to_path_buf());
1388
1389 self.consolidate_after_save(dest_path, new_size);
1392
1393 self.mark_saved_snapshot();
1394 self.original_line_ending = self.line_ending;
1395 self.original_encoding = self.encoding;
1396 Ok(())
1397 }
1398
1399 pub fn finalize_external_save(&mut self, dest_path: PathBuf) -> anyhow::Result<()> {
1403 let new_size = self.fs.metadata(&dest_path)?.size as usize;
1404 self.saved_file_size = Some(new_size);
1405 self.file_path = Some(dest_path.clone());
1406
1407 self.consolidate_after_save(&dest_path, new_size);
1409
1410 self.mark_saved_snapshot();
1411 self.original_line_ending = self.line_ending;
1412 self.original_encoding = self.encoding;
1413 Ok(())
1414 }
1415
1416 fn consolidate_after_save(&mut self, path: &Path, file_size: usize) {
1420 if self.large_file {
1421 self.consolidate_large_file(path, file_size);
1422 } else {
1423 self.consolidate_small_file();
1424 }
1425 }
1426
1427 fn consolidate_large_file(&mut self, path: &Path, file_size: usize) {
1431 let preserved_lf = if self.line_feeds_scanned {
1433 self.piece_tree.line_count().map(|c| c.saturating_sub(1))
1434 } else {
1435 None
1436 };
1437
1438 let buffer = StringBuffer {
1439 id: 0,
1440 data: BufferData::Unloaded {
1441 file_path: path.to_path_buf(),
1442 file_offset: 0,
1443 bytes: file_size,
1444 },
1445 stored_file_offset: None,
1446 };
1447
1448 self.piece_tree = if file_size > 0 {
1449 PieceTree::new(BufferLocation::Stored(0), 0, file_size, preserved_lf)
1450 } else {
1451 PieceTree::empty()
1452 };
1453
1454 self.buffers = vec![buffer];
1455 self.next_buffer_id = 1;
1456
1457 tracing::debug!(
1458 "Buffer::consolidate_large_file: consolidated into single piece of {} bytes",
1459 file_size
1460 );
1461 }
1462
1463 fn consolidate_small_file(&mut self) {
1465 if let Some(bytes) = self.get_all_text() {
1466 let line_feed_cnt = bytes.iter().filter(|&&b| b == b'\n').count();
1467 let len = bytes.len();
1468
1469 let buffer = StringBuffer::new_loaded(0, bytes, true);
1471
1472 self.piece_tree = if len > 0 {
1473 PieceTree::new(BufferLocation::Stored(0), 0, len, Some(line_feed_cnt))
1474 } else {
1475 PieceTree::empty()
1476 };
1477
1478 self.buffers = vec![buffer];
1479 self.next_buffer_id = 1;
1480
1481 tracing::debug!(
1482 "Buffer::consolidate_small_file: consolidated into single loaded buffer of {} bytes",
1483 len
1484 );
1485 }
1486 }
1487
1488 fn make_sudo_error(
1490 &self,
1491 temp_path: PathBuf,
1492 dest_path: &Path,
1493 original_metadata: Option<FileMetadata>,
1494 ) -> anyhow::Error {
1495 #[cfg(unix)]
1496 let (uid, gid, mode) = if let Some(ref meta) = original_metadata {
1497 (
1498 meta.uid.unwrap_or(0),
1499 meta.gid.unwrap_or(0),
1500 meta.permissions
1501 .as_ref()
1502 .map(|p| p.mode() & 0o7777)
1503 .unwrap_or(0),
1504 )
1505 } else {
1506 (0, 0, 0)
1507 };
1508 #[cfg(not(unix))]
1509 let (uid, gid, mode) = (0u32, 0u32, 0u32);
1510
1511 let _ = original_metadata; anyhow::anyhow!(SudoSaveRequired {
1514 temp_path,
1515 dest_path: dest_path.to_path_buf(),
1516 uid,
1517 gid,
1518 mode,
1519 })
1520 }
1521
1522 pub fn total_bytes(&self) -> usize {
1524 self.piece_tree.total_bytes()
1525 }
1526
1527 pub fn line_count(&self) -> Option<usize> {
1531 self.piece_tree.line_count()
1532 }
1533
1534 pub fn mark_saved_snapshot(&mut self) {
1536 self.saved_root = self.piece_tree.root();
1537 self.modified = false;
1538 }
1539
1540 pub fn refresh_saved_root_if_unmodified(&mut self) {
1545 if !self.modified {
1546 self.saved_root = self.piece_tree.root();
1547 }
1548 }
1549
1550 fn apply_chunk_load_to_saved_root(
1558 &mut self,
1559 old_buffer_id: usize,
1560 chunk_offset_in_buffer: usize,
1561 chunk_bytes: usize,
1562 new_buffer_id: usize,
1563 ) {
1564 use crate::model::piece_tree::{LeafData, PieceTree};
1565
1566 let mut leaves = Vec::new();
1567 self.saved_root.collect_leaves(&mut leaves);
1568
1569 let mut modified = false;
1570 let mut new_leaves: Vec<LeafData> = Vec::with_capacity(leaves.len() + 2);
1571
1572 for leaf in &leaves {
1573 if leaf.location.buffer_id() != old_buffer_id {
1574 new_leaves.push(*leaf);
1575 continue;
1576 }
1577
1578 let leaf_start = leaf.offset;
1579 let leaf_end = leaf.offset + leaf.bytes;
1580 let chunk_start = chunk_offset_in_buffer;
1581 let chunk_end = chunk_offset_in_buffer + chunk_bytes;
1582
1583 if chunk_start >= leaf_end || chunk_end <= leaf_start {
1585 new_leaves.push(*leaf);
1587 continue;
1588 }
1589
1590 modified = true;
1591
1592 if chunk_start > leaf_start {
1594 new_leaves.push(LeafData::new(
1595 leaf.location,
1596 leaf.offset,
1597 chunk_start - leaf_start,
1598 None, ));
1600 }
1601
1602 let actual_start = chunk_start.max(leaf_start);
1604 let actual_end = chunk_end.min(leaf_end);
1605 let offset_in_chunk = actual_start - chunk_start;
1606 new_leaves.push(LeafData::new(
1607 BufferLocation::Added(new_buffer_id),
1608 offset_in_chunk,
1609 actual_end - actual_start,
1610 None,
1611 ));
1612
1613 if chunk_end < leaf_end {
1615 new_leaves.push(LeafData::new(
1616 leaf.location,
1617 chunk_end,
1618 leaf_end - chunk_end,
1619 None,
1620 ));
1621 }
1622 }
1623
1624 if modified {
1625 self.saved_root = PieceTree::from_leaves(&new_leaves).root();
1626 }
1627 }
1628
1629 pub fn diff_since_saved(&self) -> PieceTreeDiff {
1641 let _span = tracing::info_span!(
1642 "diff_since_saved",
1643 large_file = self.large_file,
1644 modified = self.modified,
1645 lf_scanned = self.line_feeds_scanned
1646 )
1647 .entered();
1648
1649 if !self.modified {
1655 tracing::trace!("diff_since_saved: not modified → equal");
1656 return PieceTreeDiff {
1657 equal: true,
1658 byte_ranges: Vec::new(),
1659 nodes_visited: 0,
1660 };
1661 }
1662
1663 if Arc::ptr_eq(&self.saved_root, &self.piece_tree.root()) {
1666 tracing::trace!("diff_since_saved: Arc::ptr_eq fast path → equal");
1667 return PieceTreeDiff {
1668 equal: true,
1669 byte_ranges: Vec::new(),
1670 nodes_visited: 0,
1671 };
1672 }
1673
1674 let structure_diff = self.diff_trees_by_structure();
1677
1678 if structure_diff.equal {
1680 tracing::trace!("diff_since_saved: structure equal");
1681 return structure_diff;
1682 }
1683
1684 let total_changed_bytes: usize = structure_diff
1688 .byte_ranges
1689 .iter()
1690 .map(|r| r.end.saturating_sub(r.start))
1691 .sum();
1692
1693 const MAX_VERIFY_BYTES: usize = 64 * 1024; if total_changed_bytes <= MAX_VERIFY_BYTES && !structure_diff.byte_ranges.is_empty() {
1698 if self.verify_content_differs_in_ranges(&structure_diff.byte_ranges) {
1700 tracing::trace!(
1701 "diff_since_saved: content differs, byte_ranges={}",
1702 structure_diff.byte_ranges.len(),
1703 );
1704 return structure_diff;
1706 } else {
1707 return PieceTreeDiff {
1709 equal: true,
1710 byte_ranges: Vec::new(),
1711 nodes_visited: structure_diff.nodes_visited,
1712 };
1713 }
1714 }
1715
1716 tracing::info!(
1717 "diff_since_saved: large change, byte_ranges={}, nodes_visited={}",
1718 structure_diff.byte_ranges.len(),
1719 structure_diff.nodes_visited
1720 );
1721 structure_diff
1723 }
1724
1725 fn verify_content_differs_in_ranges(&self, byte_ranges: &[std::ops::Range<usize>]) -> bool {
1728 let saved_bytes = self.tree_total_bytes(&self.saved_root);
1729 let current_bytes = self.piece_tree.total_bytes();
1730
1731 if saved_bytes != current_bytes {
1733 return true;
1734 }
1735
1736 for range in byte_ranges {
1738 if range.start >= range.end {
1739 continue;
1740 }
1741
1742 let saved_slice =
1744 self.extract_range_from_tree(&self.saved_root, range.start, range.end);
1745 let current_slice = self.get_text_range(range.start, range.end);
1747
1748 match (saved_slice, current_slice) {
1749 (Some(saved), Some(current)) => {
1750 if saved != current {
1751 return true; }
1753 }
1754 _ => {
1755 return true;
1757 }
1758 }
1759 }
1760
1761 false
1763 }
1764
1765 fn extract_range_from_tree(
1767 &self,
1768 root: &Arc<crate::model::piece_tree::PieceTreeNode>,
1769 start: usize,
1770 end: usize,
1771 ) -> Option<Vec<u8>> {
1772 let mut result = Vec::with_capacity(end.saturating_sub(start));
1773 self.collect_range_from_node(root, start, end, 0, &mut result)?;
1774 Some(result)
1775 }
1776
1777 fn collect_range_from_node(
1779 &self,
1780 node: &Arc<crate::model::piece_tree::PieceTreeNode>,
1781 range_start: usize,
1782 range_end: usize,
1783 node_offset: usize,
1784 result: &mut Vec<u8>,
1785 ) -> Option<()> {
1786 use crate::model::piece_tree::PieceTreeNode;
1787
1788 match node.as_ref() {
1789 PieceTreeNode::Internal {
1790 left_bytes,
1791 left,
1792 right,
1793 ..
1794 } => {
1795 let left_end = node_offset + left_bytes;
1796
1797 if range_start < left_end {
1799 self.collect_range_from_node(
1800 left,
1801 range_start,
1802 range_end,
1803 node_offset,
1804 result,
1805 )?;
1806 }
1807
1808 if range_end > left_end {
1810 self.collect_range_from_node(right, range_start, range_end, left_end, result)?;
1811 }
1812 }
1813 PieceTreeNode::Leaf {
1814 location,
1815 offset,
1816 bytes,
1817 ..
1818 } => {
1819 let node_end = node_offset + bytes;
1820
1821 if range_start < node_end && range_end > node_offset {
1823 let buf = self.buffers.get(location.buffer_id())?;
1824 let data = buf.get_data()?;
1825
1826 let leaf_start = range_start.saturating_sub(node_offset);
1828 let leaf_end = (range_end - node_offset).min(*bytes);
1829
1830 if leaf_start < leaf_end {
1831 let slice = data.get(*offset + leaf_start..*offset + leaf_end)?;
1832 result.extend_from_slice(slice);
1833 }
1834 }
1835 }
1836 }
1837 Some(())
1838 }
1839
1840 fn tree_total_bytes(&self, root: &Arc<crate::model::piece_tree::PieceTreeNode>) -> usize {
1842 use crate::model::piece_tree::PieceTreeNode;
1843 match root.as_ref() {
1844 PieceTreeNode::Internal {
1845 left_bytes, right, ..
1846 } => left_bytes + self.tree_total_bytes(right),
1847 PieceTreeNode::Leaf { bytes, .. } => *bytes,
1848 }
1849 }
1850
1851 fn diff_trees_by_structure(&self) -> PieceTreeDiff {
1853 crate::model::piece_tree_diff::diff_piece_trees(&self.saved_root, &self.piece_tree.root())
1854 }
1855
1856 pub fn offset_to_position(&self, offset: usize) -> Option<Position> {
1858 self.piece_tree
1859 .offset_to_position(offset, &self.buffers)
1860 .map(|(line, column)| Position { line, column })
1861 }
1862
1863 pub fn position_to_offset(&self, position: Position) -> usize {
1865 self.piece_tree
1866 .position_to_offset(position.line, position.column, &self.buffers)
1867 }
1868
1869 pub fn insert_bytes(&mut self, offset: usize, text: Vec<u8>) -> Cursor {
1871 if text.is_empty() {
1872 return self.piece_tree.cursor_at_offset(offset);
1873 }
1874
1875 self.mark_content_modified();
1877
1878 let line_feed_cnt = Some(text.iter().filter(|&&b| b == b'\n').count());
1880
1881 let (buffer_location, buffer_offset, text_len) =
1883 if let Some(append_info) = self.try_append_to_existing_buffer(offset, &text) {
1884 append_info
1885 } else {
1886 let buffer_id = self.next_buffer_id;
1888 self.next_buffer_id += 1;
1889 let buffer = StringBuffer::new(buffer_id, text.clone());
1890 self.buffers.push(buffer);
1891 (BufferLocation::Added(buffer_id), 0, text.len())
1892 };
1893
1894 if self.line_feeds_scanned {
1897 self.ensure_chunk_loaded_at(offset);
1898 }
1899
1900 self.piece_tree.insert(
1902 offset,
1903 buffer_location,
1904 buffer_offset,
1905 text_len,
1906 line_feed_cnt,
1907 &self.buffers,
1908 )
1909 }
1910
1911 fn try_append_to_existing_buffer(
1914 &mut self,
1915 offset: usize,
1916 text: &[u8],
1917 ) -> Option<(BufferLocation, usize, usize)> {
1918 if text.is_empty() || offset == 0 {
1920 return None;
1921 }
1922
1923 let piece_info = self.piece_tree.find_by_offset(offset - 1)?;
1926
1927 let offset_in_piece = piece_info.offset_in_piece?;
1931 if offset_in_piece + 1 != piece_info.bytes {
1932 return None; }
1934
1935 if !matches!(piece_info.location, BufferLocation::Added(_)) {
1937 return None;
1938 }
1939
1940 let buffer_id = piece_info.location.buffer_id();
1941 let buffer = self.buffers.get_mut(buffer_id)?;
1942
1943 let buffer_len = buffer.get_data()?.len();
1945
1946 if piece_info.offset + piece_info.bytes != buffer_len {
1948 return None;
1949 }
1950
1951 let append_offset = buffer.append(text);
1953
1954 Some((piece_info.location, append_offset, text.len()))
1955 }
1956
1957 pub fn insert(&mut self, offset: usize, text: &str) {
1959 self.insert_bytes(offset, text.as_bytes().to_vec());
1960 }
1961
1962 pub fn insert_at_position(&mut self, position: Position, text: Vec<u8>) -> Cursor {
1965 if text.is_empty() {
1966 let offset = self.position_to_offset(position);
1967 return self.piece_tree.cursor_at_offset(offset);
1968 }
1969
1970 self.mark_content_modified();
1971
1972 let line_feed_cnt = text.iter().filter(|&&b| b == b'\n').count();
1974
1975 let buffer_id = self.next_buffer_id;
1977 self.next_buffer_id += 1;
1978 let buffer = StringBuffer::new(buffer_id, text.clone());
1979 self.buffers.push(buffer);
1980
1981 self.piece_tree.insert_at_position(
1983 position.line,
1984 position.column,
1985 BufferLocation::Added(buffer_id),
1986 0,
1987 text.len(),
1988 line_feed_cnt,
1989 &self.buffers,
1990 )
1991 }
1992
1993 pub fn delete_bytes(&mut self, offset: usize, bytes: usize) {
1995 if bytes == 0 || offset >= self.total_bytes() {
1996 return;
1997 }
1998
1999 if self.line_feeds_scanned {
2002 self.ensure_chunk_loaded_at(offset);
2003 let end = (offset + bytes).min(self.total_bytes());
2004 if end > offset {
2005 self.ensure_chunk_loaded_at(end.saturating_sub(1));
2006 }
2007 }
2008
2009 self.piece_tree.delete(offset, bytes, &self.buffers);
2011
2012 self.mark_content_modified();
2013 }
2014
2015 pub fn delete(&mut self, range: Range<usize>) {
2017 if range.end > range.start {
2018 self.delete_bytes(range.start, range.end - range.start);
2019 }
2020 }
2021
2022 pub fn delete_range(&mut self, start: Position, end: Position) {
2025 self.piece_tree.delete_position_range(
2027 start.line,
2028 start.column,
2029 end.line,
2030 end.column,
2031 &self.buffers,
2032 );
2033 self.mark_content_modified();
2034 }
2035
2036 pub fn replace_content(&mut self, new_content: &str) {
2043 let bytes = new_content.len();
2044 let content_bytes = new_content.as_bytes().to_vec();
2045
2046 let line_feed_cnt = content_bytes.iter().filter(|&&b| b == b'\n').count();
2048
2049 let buffer_id = self.next_buffer_id;
2051 self.next_buffer_id += 1;
2052 let buffer = StringBuffer::new(buffer_id, content_bytes);
2053 self.buffers.push(buffer);
2054
2055 if bytes > 0 {
2057 self.piece_tree = PieceTree::new(
2058 BufferLocation::Added(buffer_id),
2059 0,
2060 bytes,
2061 Some(line_feed_cnt),
2062 );
2063 } else {
2064 self.piece_tree = PieceTree::empty();
2065 }
2066
2067 self.mark_content_modified();
2068 }
2069
2070 pub fn restore_buffer_state(&mut self, snapshot: &BufferSnapshot) {
2076 self.piece_tree = snapshot.piece_tree.clone();
2077 self.buffers = snapshot.buffers.clone();
2078 self.next_buffer_id = snapshot.next_buffer_id;
2079 self.mark_content_modified();
2080 }
2081
2082 pub fn snapshot_buffer_state(&self) -> Arc<BufferSnapshot> {
2088 Arc::new(BufferSnapshot {
2089 piece_tree: self.piece_tree.clone(),
2090 buffers: self.buffers.clone(),
2091 next_buffer_id: self.next_buffer_id,
2092 })
2093 }
2094
2095 pub fn apply_bulk_edits(&mut self, edits: &[(usize, usize, &str)]) -> isize {
2098 let mut buffer_info: Vec<(BufferLocation, usize, usize, Option<usize>)> = Vec::new();
2103
2104 for (_, _, text) in edits {
2105 if !text.is_empty() {
2106 let buffer_id = self.next_buffer_id;
2107 self.next_buffer_id += 1;
2108 let content = text.as_bytes().to_vec();
2109 let lf_cnt = content.iter().filter(|&&b| b == b'\n').count();
2110 let bytes = content.len();
2111 let buffer = StringBuffer::new(buffer_id, content);
2112 self.buffers.push(buffer);
2113 buffer_info.push((BufferLocation::Added(buffer_id), 0, bytes, Some(lf_cnt)));
2114 }
2115 }
2117
2118 let mut idx = 0;
2120 let delta = self
2121 .piece_tree
2122 .apply_bulk_edits(edits, &self.buffers, |_text| {
2123 let info = buffer_info[idx];
2124 idx += 1;
2125 info
2126 });
2127
2128 self.mark_content_modified();
2129 delta
2130 }
2131
2132 fn get_text_range(&self, offset: usize, bytes: usize) -> Option<Vec<u8>> {
2138 if bytes == 0 {
2139 return Some(Vec::new());
2140 }
2141
2142 let mut result = Vec::with_capacity(bytes);
2143 let end_offset = offset + bytes;
2144 let mut collected = 0;
2145
2146 for piece_view in self.piece_tree.iter_pieces_in_range(offset, end_offset) {
2148 let buffer_id = piece_view.location.buffer_id();
2149 if let Some(buffer) = self.buffers.get(buffer_id) {
2150 let piece_start_in_doc = piece_view.doc_offset;
2152 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2153
2154 let read_start = offset.max(piece_start_in_doc);
2156 let read_end = end_offset.min(piece_end_in_doc);
2157
2158 if read_end > read_start {
2159 let offset_in_piece = read_start - piece_start_in_doc;
2160 let bytes_to_read = read_end - read_start;
2161
2162 let buffer_start = piece_view.buffer_offset + offset_in_piece;
2163 let buffer_end = buffer_start + bytes_to_read;
2164
2165 let data = buffer.get_data()?;
2167
2168 if buffer_end <= data.len() {
2169 result.extend_from_slice(&data[buffer_start..buffer_end]);
2170 collected += bytes_to_read;
2171
2172 if collected >= bytes {
2173 break;
2174 }
2175 }
2176 }
2177 }
2178 }
2179
2180 Some(result)
2181 }
2182
2183 pub fn get_text_range_mut(&mut self, offset: usize, bytes: usize) -> Result<Vec<u8>> {
2191 let _span = tracing::info_span!("get_text_range_mut", offset, bytes).entered();
2192 if bytes == 0 {
2193 return Ok(Vec::new());
2194 }
2195
2196 let mut result = Vec::with_capacity(bytes);
2197 let end_offset = (offset + bytes).min(self.len());
2199 let mut current_offset = offset;
2200 let mut iteration_count = 0u32;
2201
2202 while current_offset < end_offset {
2204 iteration_count += 1;
2205 let mut made_progress = false;
2206 let mut restarted_iteration = false;
2207
2208 for piece_view in self
2210 .piece_tree
2211 .iter_pieces_in_range(current_offset, end_offset)
2212 {
2213 let buffer_id = piece_view.location.buffer_id();
2214
2215 let needs_loading = self
2217 .buffers
2218 .get(buffer_id)
2219 .map(|b| !b.is_loaded())
2220 .unwrap_or(false);
2221
2222 if needs_loading && self.chunk_split_and_load(&piece_view, current_offset)? {
2223 restarted_iteration = true;
2224 break;
2225 }
2226
2227 let piece_start_in_doc = piece_view.doc_offset;
2229 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2230
2231 let read_start = current_offset.max(piece_start_in_doc);
2233 let read_end = end_offset.min(piece_end_in_doc);
2234
2235 if read_end > read_start {
2236 let offset_in_piece = read_start - piece_start_in_doc;
2237 let bytes_to_read = read_end - read_start;
2238
2239 let buffer_start = piece_view.buffer_offset + offset_in_piece;
2240 let buffer_end = buffer_start + bytes_to_read;
2241
2242 let buffer = self.buffers.get(buffer_id).context("Buffer not found")?;
2244 let data = buffer
2245 .get_data()
2246 .context("Buffer data unavailable after load")?;
2247
2248 anyhow::ensure!(
2249 buffer_end <= data.len(),
2250 "Buffer range out of bounds: requested {}..{}, buffer size {}",
2251 buffer_start,
2252 buffer_end,
2253 data.len()
2254 );
2255
2256 result.extend_from_slice(&data[buffer_start..buffer_end]);
2257 current_offset = read_end;
2258 made_progress = true;
2259 }
2260 }
2261
2262 if !made_progress && !restarted_iteration {
2264 tracing::error!(
2265 "get_text_range_mut: No progress at offset {} (requested range: {}..{}, buffer len: {})",
2266 current_offset,
2267 offset,
2268 end_offset,
2269 self.len()
2270 );
2271 tracing::error!(
2272 "Piece tree stats: {} total bytes",
2273 self.piece_tree.stats().total_bytes
2274 );
2275 anyhow::bail!(
2276 "Failed to read data at offset {}: no progress made (requested {}..{}, buffer len: {})",
2277 current_offset,
2278 offset,
2279 end_offset,
2280 self.len()
2281 );
2282 }
2283 }
2284
2285 if iteration_count > 1 {
2286 tracing::info!(
2287 iteration_count,
2288 result_len = result.len(),
2289 "get_text_range_mut: completed with multiple iterations"
2290 );
2291 }
2292
2293 Ok(result)
2294 }
2295
2296 pub fn prepare_viewport(&mut self, start_offset: usize, line_count: usize) -> Result<()> {
2309 let _span = tracing::info_span!("prepare_viewport", start_offset, line_count).entered();
2310 let estimated_bytes = line_count.saturating_mul(200);
2313
2314 let remaining_bytes = self.total_bytes().saturating_sub(start_offset);
2316 let bytes_to_load = estimated_bytes.min(remaining_bytes);
2317 tracing::trace!(
2318 bytes_to_load,
2319 total_bytes = self.total_bytes(),
2320 "prepare_viewport loading"
2321 );
2322
2323 self.get_text_range_mut(start_offset, bytes_to_load)?;
2326
2327 Ok(())
2328 }
2329
2330 fn chunk_split_and_load(
2336 &mut self,
2337 piece_view: &PieceView,
2338 current_offset: usize,
2339 ) -> Result<bool> {
2340 let buffer_id = piece_view.location.buffer_id();
2341
2342 let buffer_bytes = self
2347 .buffers
2348 .get(buffer_id)
2349 .and_then(|b| b.unloaded_bytes())
2350 .unwrap_or(0);
2351 let needs_chunk_split =
2352 piece_view.bytes > LOAD_CHUNK_SIZE || buffer_bytes > piece_view.bytes;
2353
2354 tracing::info!(
2355 buffer_id,
2356 piece_bytes = piece_view.bytes,
2357 buffer_bytes,
2358 needs_chunk_split,
2359 piece_doc_offset = piece_view.doc_offset,
2360 current_offset,
2361 "chunk_split_and_load: loading unloaded piece"
2362 );
2363
2364 if !needs_chunk_split {
2365 let _span = tracing::info_span!(
2367 "load_small_buffer",
2368 piece_bytes = piece_view.bytes,
2369 buffer_id,
2370 )
2371 .entered();
2372 self.buffers
2373 .get_mut(buffer_id)
2374 .context("Buffer not found")?
2375 .load(&*self.fs)
2376 .context("Failed to load buffer")?;
2377 return Ok(false);
2378 }
2379
2380 let _span = tracing::info_span!(
2381 "chunk_split_and_load",
2382 piece_bytes = piece_view.bytes,
2383 buffer_id,
2384 )
2385 .entered();
2386
2387 let piece_start_in_doc = piece_view.doc_offset;
2388 let offset_in_piece = current_offset.saturating_sub(piece_start_in_doc);
2389
2390 let (chunk_start_in_buffer, chunk_bytes) = if piece_view.bytes <= LOAD_CHUNK_SIZE {
2395 (piece_view.buffer_offset, piece_view.bytes)
2396 } else {
2397 let start =
2398 (piece_view.buffer_offset + offset_in_piece) / CHUNK_ALIGNMENT * CHUNK_ALIGNMENT;
2399 let bytes = LOAD_CHUNK_SIZE
2400 .min((piece_view.buffer_offset + piece_view.bytes).saturating_sub(start));
2401 (start, bytes)
2402 };
2403
2404 let chunk_start_offset_in_piece =
2406 chunk_start_in_buffer.saturating_sub(piece_view.buffer_offset);
2407 let split_start_in_doc = piece_start_in_doc + chunk_start_offset_in_piece;
2408 let split_end_in_doc = split_start_in_doc + chunk_bytes;
2409
2410 if chunk_start_offset_in_piece > 0 {
2412 self.piece_tree
2413 .split_at_offset(split_start_in_doc, &self.buffers);
2414 }
2415 if split_end_in_doc < piece_start_in_doc + piece_view.bytes {
2416 self.piece_tree
2417 .split_at_offset(split_end_in_doc, &self.buffers);
2418 }
2419
2420 let chunk_buffer = self
2422 .buffers
2423 .get(buffer_id)
2424 .context("Buffer not found")?
2425 .create_chunk_buffer(self.next_buffer_id, chunk_start_in_buffer, chunk_bytes)
2426 .context("Failed to create chunk buffer")?;
2427
2428 self.next_buffer_id += 1;
2429 let new_buffer_id = chunk_buffer.id;
2430 self.buffers.push(chunk_buffer);
2431
2432 self.piece_tree.replace_buffer_reference(
2434 buffer_id,
2435 piece_view.buffer_offset + chunk_start_offset_in_piece,
2436 chunk_bytes,
2437 BufferLocation::Added(new_buffer_id),
2438 );
2439
2440 self.buffers
2442 .get_mut(new_buffer_id)
2443 .context("Chunk buffer not found")?
2444 .load(&*self.fs)
2445 .context("Failed to load chunk")?;
2446
2447 if self.line_feeds_scanned {
2452 let leaves = self.piece_tree.get_leaves();
2453 let mut fixups: Vec<(usize, usize)> = Vec::new();
2454 for (idx, leaf) in leaves.iter().enumerate() {
2455 if leaf.line_feed_cnt.is_none() {
2456 if let Ok(count) = self.scan_leaf(leaf) {
2457 fixups.push((idx, count));
2458 }
2459 }
2460 }
2461 if !fixups.is_empty() {
2462 self.piece_tree.update_leaf_line_feeds_path_copy(&fixups);
2463 }
2464 }
2465
2466 if !self.modified {
2474 self.saved_root = self.piece_tree.root();
2475 } else {
2476 self.apply_chunk_load_to_saved_root(
2477 buffer_id,
2478 chunk_start_in_buffer,
2479 chunk_bytes,
2480 new_buffer_id,
2481 );
2482 }
2483
2484 Ok(true)
2485 }
2486
2487 pub(crate) fn get_all_text(&self) -> Option<Vec<u8>> {
2491 self.get_text_range(0, self.total_bytes())
2492 }
2493
2494 pub(crate) fn get_all_text_string(&self) -> Option<String> {
2498 self.get_all_text()
2499 .map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
2500 }
2501
2502 pub(crate) fn slice_bytes(&self, range: Range<usize>) -> Vec<u8> {
2507 self.get_text_range(range.start, range.end.saturating_sub(range.start))
2508 .unwrap_or_default()
2509 }
2510
2511 pub fn to_string(&self) -> Option<String> {
2514 self.get_all_text_string()
2515 }
2516
2517 pub fn len(&self) -> usize {
2519 self.total_bytes()
2520 }
2521
2522 pub fn is_empty(&self) -> bool {
2524 self.total_bytes() == 0
2525 }
2526
2527 pub fn file_path(&self) -> Option<&Path> {
2529 self.file_path.as_deref()
2530 }
2531
2532 pub fn rename_file_path(&mut self, path: PathBuf) {
2534 self.file_path = Some(path);
2535 }
2536
2537 pub fn clear_file_path(&mut self) {
2541 self.file_path = None;
2542 }
2543
2544 pub fn extend_streaming(&mut self, source_path: &Path, new_size: usize) {
2548 let old_size = self.total_bytes();
2549 if new_size <= old_size {
2550 return;
2551 }
2552
2553 let additional_bytes = new_size - old_size;
2554
2555 let buffer_id = self.next_buffer_id;
2557 self.next_buffer_id += 1;
2558
2559 let new_buffer = StringBuffer::new_unloaded(
2560 buffer_id,
2561 source_path.to_path_buf(),
2562 old_size, additional_bytes, );
2565 self.buffers.push(new_buffer);
2566
2567 self.piece_tree.insert(
2569 old_size,
2570 BufferLocation::Stored(buffer_id),
2571 0,
2572 additional_bytes,
2573 None, &self.buffers,
2575 );
2576 }
2577
2578 pub fn is_modified(&self) -> bool {
2580 self.modified
2581 }
2582
2583 pub fn clear_modified(&mut self) {
2585 self.modified = false;
2586 }
2587
2588 pub fn set_modified(&mut self, modified: bool) {
2591 self.modified = modified;
2592 }
2593
2594 pub fn is_recovery_pending(&self) -> bool {
2596 self.recovery_pending
2597 }
2598
2599 pub fn set_recovery_pending(&mut self, pending: bool) {
2601 self.recovery_pending = pending;
2602 }
2603
2604 fn ensure_chunk_loaded_at(&mut self, offset: usize) {
2610 if let Some(piece_info) = self.piece_tree.find_by_offset(offset) {
2611 let buffer_id = piece_info.location.buffer_id();
2612 if let Some(buffer) = self.buffers.get_mut(buffer_id) {
2613 if !buffer.is_loaded() {
2614 let buf_bytes = buffer.unloaded_bytes().unwrap_or(0);
2615 tracing::info!(
2616 "ensure_chunk_loaded_at: loading buffer {} ({} bytes) for offset {}",
2617 buffer_id,
2618 buf_bytes,
2619 offset
2620 );
2621 if let Err(e) = buffer.load(&*self.fs) {
2622 tracing::warn!("Failed to load chunk at offset {offset}: {e}");
2623 }
2624 }
2625 }
2626 }
2627 }
2628
2629 pub fn is_large_file(&self) -> bool {
2631 self.large_file
2632 }
2633
2634 pub fn has_line_feed_scan(&self) -> bool {
2637 self.line_feeds_scanned
2638 }
2639
2640 pub fn piece_tree_leaves(&self) -> Vec<crate::model::piece_tree::LeafData> {
2642 self.piece_tree.get_leaves()
2643 }
2644
2645 pub fn prepare_line_scan(&mut self) -> (Vec<LineScanChunk>, usize) {
2654 self.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
2656
2657 let leaves = self.piece_tree.get_leaves();
2658 let total_bytes: usize = leaves.iter().map(|l| l.bytes).sum();
2659 let mut chunks = Vec::new();
2660
2661 for (idx, leaf) in leaves.iter().enumerate() {
2662 chunks.push(LineScanChunk {
2663 leaf_index: idx,
2664 byte_len: leaf.bytes,
2665 already_known: leaf.line_feed_cnt.is_some(),
2666 });
2667 }
2668
2669 (chunks, total_bytes)
2670 }
2671
2672 pub fn search_scan_init(
2678 &mut self,
2679 regex: regex::bytes::Regex,
2680 max_matches: usize,
2681 query_len: usize,
2682 ) -> ChunkedSearchState {
2683 let (chunks, total_bytes) = self.prepare_line_scan();
2684 ChunkedSearchState {
2685 chunks,
2686 next_chunk: 0,
2687 next_doc_offset: 0,
2688 total_bytes,
2689 scanned_bytes: 0,
2690 regex,
2691 matches: Vec::new(),
2692 overlap_tail: Vec::new(),
2693 overlap_doc_offset: 0,
2694 max_matches,
2695 capped: false,
2696 query_len,
2697 running_line: 1,
2698 }
2699 }
2700
2701 pub fn search_scan_next_chunk(
2719 &mut self,
2720 state: &mut ChunkedSearchState,
2721 ) -> std::io::Result<bool> {
2722 if state.is_done() {
2723 return Ok(false);
2724 }
2725
2726 let chunk_info = state.chunks[state.next_chunk].clone();
2727 let doc_offset = state.next_doc_offset;
2728
2729 state.next_chunk += 1;
2730 state.scanned_bytes += chunk_info.byte_len;
2731 state.next_doc_offset += chunk_info.byte_len;
2732
2733 let chunk_bytes = self
2735 .get_text_range_mut(doc_offset, chunk_info.byte_len)
2736 .map_err(std::io::Error::other)?;
2737
2738 let overlap_len = state.overlap_tail.len();
2740 let mut search_buf = Vec::with_capacity(overlap_len + chunk_bytes.len());
2741 search_buf.extend_from_slice(&state.overlap_tail);
2742 search_buf.extend_from_slice(&chunk_bytes);
2743
2744 let buf_doc_offset = if overlap_len > 0 {
2745 state.overlap_doc_offset
2746 } else {
2747 doc_offset
2748 };
2749
2750 let newlines_in_overlap = search_buf[..overlap_len]
2754 .iter()
2755 .filter(|&&b| b == b'\n')
2756 .count();
2757 let mut line_at = state.running_line - newlines_in_overlap;
2758 let mut counted_to = 0usize;
2759
2760 for m in state.regex.find_iter(&search_buf) {
2762 if overlap_len > 0 && m.end() <= overlap_len {
2764 continue;
2765 }
2766
2767 if state.matches.len() >= state.max_matches {
2768 state.capped = true;
2769 break;
2770 }
2771
2772 line_at += search_buf[counted_to..m.start()]
2774 .iter()
2775 .filter(|&&b| b == b'\n')
2776 .count();
2777 counted_to = m.start();
2778
2779 let line_start = search_buf[..m.start()]
2781 .iter()
2782 .rposition(|&b| b == b'\n')
2783 .map(|p| p + 1)
2784 .unwrap_or(0);
2785 let line_end = search_buf[m.start()..]
2786 .iter()
2787 .position(|&b| b == b'\n')
2788 .map(|p| m.start() + p)
2789 .unwrap_or(search_buf.len());
2790
2791 let match_doc_offset = buf_doc_offset + m.start();
2792 let match_len = m.end() - m.start();
2793 let column = m.start() - line_start + 1;
2794 let context = String::from_utf8_lossy(&search_buf[line_start..line_end]).into_owned();
2795
2796 state.matches.push(SearchMatch {
2797 byte_offset: match_doc_offset,
2798 length: match_len,
2799 line: line_at,
2800 column,
2801 context,
2802 });
2803 }
2804
2805 let newlines_in_chunk = chunk_bytes.iter().filter(|&&b| b == b'\n').count();
2807 state.running_line += newlines_in_chunk;
2808
2809 let max_overlap = state.query_len.max(256).min(chunk_bytes.len());
2811 let tail_start = chunk_bytes.len().saturating_sub(max_overlap);
2812 state.overlap_tail = chunk_bytes[tail_start..].to_vec();
2813 state.overlap_doc_offset = doc_offset + tail_start;
2814
2815 Ok(!state.is_done())
2816 }
2817
2818 pub fn search_scan_all(
2823 &mut self,
2824 regex: regex::bytes::Regex,
2825 max_matches: usize,
2826 query_len: usize,
2827 ) -> std::io::Result<ChunkedSearchState> {
2828 let mut state = self.search_scan_init(regex, max_matches, query_len);
2829 while self.search_scan_next_chunk(&mut state)? {}
2830 Ok(state)
2831 }
2832
2833 pub fn search_hybrid_plan(&mut self) -> Option<HybridSearchPlan> {
2842 let file_path = self.file_path.clone()?;
2843
2844 self.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
2845 let leaves = self.piece_tree.get_leaves();
2846
2847 let mut regions: Vec<SearchRegion> = Vec::new();
2848 let mut doc_offset = 0usize;
2849
2850 for leaf in &leaves {
2851 let buf = self.buffers.get(leaf.location.buffer_id());
2852 let is_unloaded_stored = matches!(
2853 (&leaf.location, buf),
2854 (
2855 BufferLocation::Stored(_),
2856 Some(StringBuffer {
2857 data: BufferData::Unloaded { .. },
2858 ..
2859 }),
2860 )
2861 );
2862
2863 if is_unloaded_stored {
2864 let file_offset = match buf.unwrap().data {
2865 BufferData::Unloaded {
2866 file_offset: fo, ..
2867 } => fo + leaf.offset,
2868 _ => unreachable!(),
2869 };
2870
2871 if let Some(SearchRegion::Unloaded {
2873 file_offset: prev_fo,
2874 bytes: prev_bytes,
2875 ..
2876 }) = regions.last_mut()
2877 {
2878 if *prev_fo + *prev_bytes == file_offset {
2879 *prev_bytes += leaf.bytes;
2880 doc_offset += leaf.bytes;
2881 continue;
2882 }
2883 }
2884 regions.push(SearchRegion::Unloaded {
2885 file_offset,
2886 bytes: leaf.bytes,
2887 doc_offset,
2888 });
2889 } else {
2890 let data = match buf.and_then(|b| b.get_data()) {
2891 Some(full) => {
2892 let end = (leaf.offset + leaf.bytes).min(full.len());
2893 full[leaf.offset..end].to_vec()
2894 }
2895 None => match self.get_text_range_mut(doc_offset, leaf.bytes) {
2896 Ok(d) => d,
2897 Err(_) => {
2898 doc_offset += leaf.bytes;
2899 continue;
2900 }
2901 },
2902 };
2903
2904 if let Some(SearchRegion::Loaded {
2906 data: prev_data, ..
2907 }) = regions.last_mut()
2908 {
2909 prev_data.extend_from_slice(&data);
2910 doc_offset += leaf.bytes;
2911 continue;
2912 }
2913 regions.push(SearchRegion::Loaded { data, doc_offset });
2914 }
2915
2916 doc_offset += leaf.bytes;
2917 }
2918
2919 Some(HybridSearchPlan { file_path, regions })
2920 }
2921
2922 pub fn search_hybrid(
2932 &mut self,
2933 pattern: &str,
2934 opts: &FileSearchOptions,
2935 regex: Regex,
2936 max_matches: usize,
2937 query_len: usize,
2938 ) -> io::Result<Vec<SearchMatch>> {
2939 let plan = match self.search_hybrid_plan() {
2940 Some(p) => p,
2941 None => {
2942 let state = self.search_scan_all(regex, max_matches, query_len)?;
2943 return Ok(state.matches);
2944 }
2945 };
2946 plan.execute(&*self.fs, pattern, opts, ®ex, max_matches, query_len)
2947 }
2948
2949 pub fn scan_leaf(&self, leaf: &crate::model::piece_tree::LeafData) -> std::io::Result<usize> {
2954 let buffer_id = leaf.location.buffer_id();
2955 let buffer = self
2956 .buffers
2957 .get(buffer_id)
2958 .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::NotFound, "buffer not found"))?;
2959
2960 let count = match &buffer.data {
2961 crate::model::piece_tree::BufferData::Loaded { data, .. } => {
2962 let end = (leaf.offset + leaf.bytes).min(data.len());
2963 data[leaf.offset..end]
2964 .iter()
2965 .filter(|&&b| b == b'\n')
2966 .count()
2967 }
2968 crate::model::piece_tree::BufferData::Unloaded {
2969 file_path,
2970 file_offset,
2971 ..
2972 } => {
2973 let read_offset = *file_offset as u64 + leaf.offset as u64;
2974 self.fs
2975 .count_line_feeds_in_range(file_path, read_offset, leaf.bytes)?
2976 }
2977 };
2978 Ok(count)
2979 }
2980
2981 pub fn leaf_io_params(
2986 &self,
2987 leaf: &crate::model::piece_tree::LeafData,
2988 ) -> Option<(std::path::PathBuf, u64, usize)> {
2989 let buffer_id = leaf.location.buffer_id();
2990 let buffer = self.buffers.get(buffer_id)?;
2991 match &buffer.data {
2992 crate::model::piece_tree::BufferData::Loaded { .. } => None,
2993 crate::model::piece_tree::BufferData::Unloaded {
2994 file_path,
2995 file_offset,
2996 ..
2997 } => {
2998 let read_offset = *file_offset as u64 + leaf.offset as u64;
2999 Some((file_path.clone(), read_offset, leaf.bytes))
3000 }
3001 }
3002 }
3003
3004 pub fn buffer_slice(&self) -> &[StringBuffer] {
3006 &self.buffers
3007 }
3008
3009 pub fn apply_scan_updates(&mut self, updates: &[(usize, usize)]) {
3011 self.piece_tree.update_leaf_line_feeds(updates);
3012 self.line_feeds_scanned = true;
3013 }
3014
3015 pub fn rebuild_with_pristine_saved_root(&mut self, scan_updates: &[(usize, usize)]) {
3020 let file_size = match self.saved_file_size {
3021 Some(s) => s,
3022 None => {
3023 self.apply_scan_updates(scan_updates);
3026 return;
3027 }
3028 };
3029
3030 let total = self.total_bytes();
3032 let mut deletions: Vec<(usize, usize)> = Vec::new();
3034 let mut insertions: Vec<(usize, BufferLocation, usize, usize, Option<usize>)> = Vec::new();
3037 let mut orig_cursor: usize = 0;
3038 let mut stored_bytes_in_doc: usize = 0;
3039
3040 for piece in self.piece_tree.iter_pieces_in_range(0, total) {
3041 match piece.location {
3042 BufferLocation::Stored(_) => {
3043 if piece.buffer_offset > orig_cursor {
3044 deletions.push((orig_cursor, piece.buffer_offset - orig_cursor));
3045 }
3046 orig_cursor = piece.buffer_offset + piece.bytes;
3047 stored_bytes_in_doc += piece.bytes;
3048 }
3049 BufferLocation::Added(id) => {
3050 if let Some(file_off) = self.buffers.get(id).and_then(|b| b.stored_file_offset)
3054 {
3055 if file_off > orig_cursor {
3056 deletions.push((orig_cursor, file_off - orig_cursor));
3057 }
3058 orig_cursor = file_off + piece.bytes;
3059 stored_bytes_in_doc += piece.bytes;
3060 } else {
3061 insertions.push((
3062 stored_bytes_in_doc,
3063 piece.location,
3064 piece.buffer_offset,
3065 piece.bytes,
3066 piece.line_feed_cnt,
3067 ));
3068 }
3069 }
3070 }
3071 }
3072 if orig_cursor < file_size {
3074 deletions.push((orig_cursor, file_size - orig_cursor));
3075 }
3076
3077 let mut pristine = if file_size > 0 {
3079 PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
3080 } else {
3081 PieceTree::empty()
3082 };
3083 pristine.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
3084 pristine.update_leaf_line_feeds(scan_updates);
3085
3086 self.saved_root = pristine.root();
3088
3089 if deletions.is_empty() && insertions.is_empty() {
3091 self.piece_tree = pristine;
3092 self.line_feeds_scanned = true;
3093 return;
3094 }
3095
3096 let mut tree = pristine;
3098
3099 deletions.sort_by(|a, b| b.0.cmp(&a.0));
3101 for &(offset, len) in &deletions {
3102 tree.delete(offset, len, &self.buffers);
3103 }
3104
3105 let mut insert_delta: usize = 0;
3108 for &(offset, location, buf_offset, bytes, lf_cnt) in &insertions {
3109 tree.insert(
3110 offset + insert_delta,
3111 location,
3112 buf_offset,
3113 bytes,
3114 lf_cnt,
3115 &self.buffers,
3116 );
3117 insert_delta += bytes;
3118 }
3119
3120 let leaves = tree.get_leaves();
3125 let mut fixups: Vec<(usize, usize)> = Vec::new();
3126 for (idx, leaf) in leaves.iter().enumerate() {
3127 if leaf.line_feed_cnt.is_none() {
3128 if let Ok(count) = self.scan_leaf(leaf) {
3129 fixups.push((idx, count));
3130 }
3131 }
3132 }
3133 if !fixups.is_empty() {
3134 tree.update_leaf_line_feeds_path_copy(&fixups);
3135 }
3136
3137 self.piece_tree = tree;
3138 self.line_feeds_scanned = true;
3139 }
3140
3141 pub fn resolve_line_byte_offset(&mut self, target_line: usize) -> Option<usize> {
3147 if target_line == 0 {
3148 return Some(0);
3149 }
3150
3151 let (doc_offset, buffer_id, piece_offset, piece_bytes, lines_before) =
3153 self.piece_tree.piece_info_for_line(target_line)?;
3154
3155 let lines_to_skip = target_line - lines_before;
3157
3158 let buffer = self.buffers.get(buffer_id)?;
3160 let piece_data: Vec<u8> = match &buffer.data {
3161 crate::model::piece_tree::BufferData::Loaded { data, .. } => {
3162 let end = (piece_offset + piece_bytes).min(data.len());
3163 data[piece_offset..end].to_vec()
3164 }
3165 crate::model::piece_tree::BufferData::Unloaded {
3166 file_path,
3167 file_offset,
3168 ..
3169 } => {
3170 let read_offset = *file_offset as u64 + piece_offset as u64;
3171 self.fs
3172 .read_range(file_path, read_offset, piece_bytes)
3173 .ok()?
3174 }
3175 };
3176
3177 let mut newlines_found = 0;
3179 for (i, &byte) in piece_data.iter().enumerate() {
3180 if byte == b'\n' {
3181 newlines_found += 1;
3182 if newlines_found == lines_to_skip {
3183 return Some(doc_offset + i + 1);
3185 }
3186 }
3187 }
3188
3189 Some(doc_offset + piece_bytes)
3192 }
3193
3194 pub fn original_file_size(&self) -> Option<usize> {
3198 self.saved_file_size
3201 }
3202
3203 pub fn get_recovery_chunks(&self) -> Vec<(usize, Vec<u8>)> {
3212 use crate::model::piece_tree::BufferLocation;
3213
3214 let mut chunks = Vec::new();
3215 let total = self.total_bytes();
3216
3217 let mut stored_bytes_before = 0;
3223
3224 for piece in self.piece_tree.iter_pieces_in_range(0, total) {
3225 match piece.location {
3226 BufferLocation::Stored(_) => {
3227 stored_bytes_before += piece.bytes;
3229 }
3230 BufferLocation::Added(buffer_id) => {
3231 if let Some(buffer) = self.buffers.iter().find(|b| b.id == buffer_id) {
3232 if buffer.stored_file_offset.is_some() {
3249 stored_bytes_before += piece.bytes;
3250 continue;
3251 }
3252 if let Some(data) = buffer.get_data() {
3254 let start = piece.buffer_offset;
3256 let end = start + piece.bytes;
3257 if end <= data.len() {
3258 chunks.push((stored_bytes_before, data[start..end].to_vec()));
3262 }
3263 }
3264 }
3265 }
3266 }
3267 }
3268
3269 chunks
3270 }
3271
3272 pub fn is_binary(&self) -> bool {
3274 self.is_binary
3275 }
3276
3277 pub fn line_ending(&self) -> LineEnding {
3279 self.line_ending
3280 }
3281
3282 pub fn set_line_ending(&mut self, line_ending: LineEnding) {
3287 self.line_ending = line_ending;
3288 self.mark_content_modified();
3289 }
3290
3291 pub fn set_default_line_ending(&mut self, line_ending: LineEnding) {
3296 self.line_ending = line_ending;
3297 self.original_line_ending = line_ending;
3298 }
3299
3300 pub fn encoding(&self) -> Encoding {
3302 self.encoding
3303 }
3304
3305 pub fn set_encoding(&mut self, encoding: Encoding) {
3310 self.encoding = encoding;
3311 self.mark_content_modified();
3312 }
3313
3314 pub fn set_default_encoding(&mut self, encoding: Encoding) {
3319 self.encoding = encoding;
3320 self.original_encoding = encoding;
3321 }
3322
3323 pub fn detect_line_ending(bytes: &[u8]) -> LineEnding {
3328 let check_len = bytes.len().min(8 * 1024);
3330 let sample = &bytes[..check_len];
3331
3332 let mut crlf_count = 0;
3333 let mut lf_only_count = 0;
3334 let mut cr_only_count = 0;
3335
3336 let mut i = 0;
3337 while i < sample.len() {
3338 if sample[i] == b'\r' {
3339 if i + 1 < sample.len() && sample[i + 1] == b'\n' {
3341 crlf_count += 1;
3342 i += 2; continue;
3344 } else {
3345 cr_only_count += 1;
3347 }
3348 } else if sample[i] == b'\n' {
3349 lf_only_count += 1;
3351 }
3352 i += 1;
3353 }
3354
3355 if crlf_count > lf_only_count && crlf_count > cr_only_count {
3357 LineEnding::CRLF
3358 } else if cr_only_count > lf_only_count && cr_only_count > crlf_count {
3359 LineEnding::CR
3360 } else {
3361 LineEnding::LF
3363 }
3364 }
3365
3366 pub fn detect_encoding(bytes: &[u8]) -> Encoding {
3371 encoding::detect_encoding(bytes)
3372 }
3373
3374 pub fn detect_encoding_or_binary(bytes: &[u8], truncated: bool) -> (Encoding, bool) {
3382 encoding::detect_encoding_or_binary(bytes, truncated)
3383 }
3384
3385 pub fn detect_and_convert_encoding(bytes: &[u8]) -> (Encoding, Vec<u8>) {
3390 encoding::detect_and_convert(bytes)
3391 }
3392
3393 pub fn convert_to_encoding(utf8_bytes: &[u8], target_encoding: Encoding) -> Vec<u8> {
3399 encoding::convert_from_utf8(utf8_bytes, target_encoding)
3400 }
3401
3402 #[allow(dead_code)] pub fn normalize_line_endings(bytes: Vec<u8>) -> Vec<u8> {
3409 let mut normalized = Vec::with_capacity(bytes.len());
3410 let mut i = 0;
3411
3412 while i < bytes.len() {
3413 if bytes[i] == b'\r' {
3414 if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
3416 normalized.push(b'\n');
3418 i += 2; continue;
3420 } else {
3421 normalized.push(b'\n');
3423 }
3424 } else {
3425 normalized.push(bytes[i]);
3427 }
3428 i += 1;
3429 }
3430
3431 normalized
3432 }
3433
3434 fn convert_line_endings_to(bytes: &[u8], target_ending: LineEnding) -> Vec<u8> {
3439 let mut normalized = Vec::with_capacity(bytes.len());
3441 let mut i = 0;
3442 while i < bytes.len() {
3443 if bytes[i] == b'\r' {
3444 if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
3446 normalized.push(b'\n');
3448 i += 2;
3449 continue;
3450 } else {
3451 normalized.push(b'\n');
3453 }
3454 } else {
3455 normalized.push(bytes[i]);
3456 }
3457 i += 1;
3458 }
3459
3460 if target_ending == LineEnding::LF {
3462 return normalized;
3463 }
3464
3465 let replacement = target_ending.as_str().as_bytes();
3467 let mut result = Vec::with_capacity(normalized.len() + normalized.len() / 10);
3468
3469 for byte in normalized {
3470 if byte == b'\n' {
3471 result.extend_from_slice(replacement);
3472 } else {
3473 result.push(byte);
3474 }
3475 }
3476
3477 result
3478 }
3479
3480 pub fn get_line(&self, line: usize) -> Option<Vec<u8>> {
3482 let (start, end) = self.piece_tree.line_range(line, &self.buffers)?;
3483
3484 let bytes = if let Some(end_offset) = end {
3485 end_offset.saturating_sub(start)
3486 } else {
3487 self.total_bytes().saturating_sub(start)
3488 };
3489
3490 self.get_text_range(start, bytes)
3491 }
3492
3493 pub fn line_start_offset(&self, line: usize) -> Option<usize> {
3495 let (start, _) = self.piece_tree.line_range(line, &self.buffers)?;
3496 Some(start)
3497 }
3498
3499 pub fn piece_info_at_offset(&self, offset: usize) -> Option<PieceInfo> {
3501 self.piece_tree.find_by_offset(offset)
3502 }
3503
3504 pub fn stats(&self) -> TreeStats {
3506 self.piece_tree.stats()
3507 }
3508
3509 pub fn find_next(&self, pattern: &str, start_pos: usize) -> Option<usize> {
3513 if pattern.is_empty() {
3514 return None;
3515 }
3516
3517 let pattern_bytes = pattern.as_bytes();
3518 let buffer_len = self.len();
3519
3520 if start_pos < buffer_len {
3522 if let Some(offset) = self.find_pattern(start_pos, buffer_len, pattern_bytes) {
3523 return Some(offset);
3524 }
3525 }
3526
3527 if start_pos > 0 {
3529 if let Some(offset) = self.find_pattern(0, start_pos, pattern_bytes) {
3530 return Some(offset);
3531 }
3532 }
3533
3534 None
3535 }
3536
3537 pub fn find_next_in_range(
3541 &self,
3542 pattern: &str,
3543 start_pos: usize,
3544 range: Option<Range<usize>>,
3545 ) -> Option<usize> {
3546 if pattern.is_empty() {
3547 return None;
3548 }
3549
3550 if let Some(search_range) = range {
3551 let pattern_bytes = pattern.as_bytes();
3553 let search_start = start_pos.max(search_range.start);
3554 let search_end = search_range.end.min(self.len());
3555
3556 if search_start < search_end {
3557 self.find_pattern(search_start, search_end, pattern_bytes)
3558 } else {
3559 None
3560 }
3561 } else {
3562 self.find_next(pattern, start_pos)
3564 }
3565 }
3566
3567 fn find_pattern(&self, start: usize, end: usize, pattern: &[u8]) -> Option<usize> {
3569 if pattern.is_empty() || start >= end {
3570 return None;
3571 }
3572
3573 const CHUNK_SIZE: usize = 65536; let overlap = pattern.len().saturating_sub(1).max(1);
3575
3576 let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, overlap);
3578
3579 for chunk in chunks {
3580 if let Some(pos) = Self::find_in_bytes(&chunk.buffer, pattern) {
3582 let match_end = pos + pattern.len();
3583 if match_end > chunk.valid_start {
3586 let absolute_pos = chunk.absolute_pos + pos;
3587 if absolute_pos + pattern.len() <= end {
3589 return Some(absolute_pos);
3590 }
3591 }
3592 }
3593 }
3594
3595 None
3596 }
3597
3598 fn find_in_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
3600 if needle.is_empty() || needle.len() > haystack.len() {
3601 return None;
3602 }
3603
3604 (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
3605 }
3606
3607 pub fn find_next_regex(&self, regex: &Regex, start_pos: usize) -> Option<usize> {
3609 let buffer_len = self.len();
3610
3611 if start_pos < buffer_len {
3613 if let Some(offset) = self.find_regex(start_pos, buffer_len, regex) {
3614 return Some(offset);
3615 }
3616 }
3617
3618 if start_pos > 0 {
3620 if let Some(offset) = self.find_regex(0, start_pos, regex) {
3621 return Some(offset);
3622 }
3623 }
3624
3625 None
3626 }
3627
3628 pub fn find_next_regex_in_range(
3630 &self,
3631 regex: &Regex,
3632 start_pos: usize,
3633 range: Option<Range<usize>>,
3634 ) -> Option<usize> {
3635 if let Some(search_range) = range {
3636 let search_start = start_pos.max(search_range.start);
3637 let search_end = search_range.end.min(self.len());
3638
3639 if search_start < search_end {
3640 self.find_regex(search_start, search_end, regex)
3641 } else {
3642 None
3643 }
3644 } else {
3645 self.find_next_regex(regex, start_pos)
3646 }
3647 }
3648
3649 fn find_regex(&self, start: usize, end: usize, regex: &Regex) -> Option<usize> {
3651 if start >= end {
3652 return None;
3653 }
3654
3655 const CHUNK_SIZE: usize = 1048576; const OVERLAP: usize = 4096; let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, OVERLAP);
3661
3662 for chunk in chunks {
3663 if let Some(mat) = regex.find(&chunk.buffer) {
3665 let match_end = mat.end();
3666 if match_end > chunk.valid_start {
3669 let absolute_pos = chunk.absolute_pos + mat.start();
3670 let match_len = mat.end() - mat.start();
3672 if absolute_pos + match_len <= end {
3673 return Some(absolute_pos);
3674 }
3675 }
3676 }
3677 }
3678
3679 None
3680 }
3681
3682 pub fn replace_range(&mut self, range: Range<usize>, replacement: &str) -> bool {
3684 if range.start >= self.len() {
3685 return false;
3686 }
3687
3688 let end = range.end.min(self.len());
3689 if end > range.start {
3690 self.delete_bytes(range.start, end - range.start);
3691 }
3692
3693 if !replacement.is_empty() {
3694 self.insert(range.start, replacement);
3695 }
3696
3697 true
3698 }
3699
3700 pub fn replace_next(
3702 &mut self,
3703 pattern: &str,
3704 replacement: &str,
3705 start_pos: usize,
3706 range: Option<Range<usize>>,
3707 ) -> Option<usize> {
3708 if let Some(pos) = self.find_next_in_range(pattern, start_pos, range.clone()) {
3709 self.replace_range(pos..pos + pattern.len(), replacement);
3710 Some(pos)
3711 } else {
3712 None
3713 }
3714 }
3715
3716 pub fn replace_all(&mut self, pattern: &str, replacement: &str) -> usize {
3718 if pattern.is_empty() {
3719 return 0;
3720 }
3721
3722 let mut count = 0;
3723 let mut pos = 0;
3724
3725 while let Some(found_pos) = self.find_next_in_range(pattern, pos, Some(0..self.len())) {
3729 self.replace_range(found_pos..found_pos + pattern.len(), replacement);
3730 count += 1;
3731
3732 pos = found_pos + replacement.len();
3734
3735 if pos >= self.len() {
3737 break;
3738 }
3739 }
3740
3741 count
3742 }
3743
3744 pub fn replace_all_regex(&mut self, regex: &Regex, replacement: &str) -> Result<usize> {
3746 let mut count = 0;
3747 let mut pos = 0;
3748
3749 while let Some(found_pos) = self.find_next_regex_in_range(regex, pos, Some(0..self.len())) {
3750 let text = self
3752 .get_text_range_mut(found_pos, self.len() - found_pos)
3753 .context("Failed to read text for regex match")?;
3754
3755 if let Some(mat) = regex.find(&text) {
3756 self.replace_range(found_pos..found_pos + mat.len(), replacement);
3757 count += 1;
3758 pos = found_pos + replacement.len();
3759
3760 if pos >= self.len() {
3761 break;
3762 }
3763 } else {
3764 break;
3765 }
3766 }
3767
3768 Ok(count)
3769 }
3770
3771 pub fn position_to_line_col(&self, byte_pos: usize) -> (usize, usize) {
3775 self.offset_to_position(byte_pos)
3776 .map(|pos| (pos.line, pos.column))
3777 .unwrap_or_else(|| (byte_pos / 80, 0)) }
3779
3780 pub fn line_col_to_position(&self, line: usize, character: usize) -> usize {
3784 if let Some((start, end)) = self.piece_tree.line_range(line, &self.buffers) {
3785 let line_len = if let Some(end_offset) = end {
3787 end_offset.saturating_sub(start)
3788 } else {
3789 self.total_bytes().saturating_sub(start)
3790 };
3791 let byte_offset = character.min(line_len);
3792 start + byte_offset
3793 } else {
3794 self.len()
3796 }
3797 }
3798
3799 pub fn position_to_lsp_position(&self, byte_pos: usize) -> (usize, usize) {
3802 let (line, column_bytes) = self
3803 .offset_to_position(byte_pos)
3804 .map(|pos| (pos.line, pos.column))
3805 .unwrap_or_else(|| (byte_pos / 80, 0)); if let Some(line_bytes) = self.get_line(line) {
3809 let text_before = &line_bytes[..column_bytes.min(line_bytes.len())];
3811 let text_str = String::from_utf8_lossy(text_before);
3812 let utf16_offset = text_str.encode_utf16().count();
3813 (line, utf16_offset)
3814 } else {
3815 (line, 0)
3816 }
3817 }
3818
3819 pub fn lsp_position_to_byte(&self, line: usize, utf16_offset: usize) -> usize {
3823 if let Some((line_start, end)) = self.piece_tree.line_range(line, &self.buffers) {
3824 let line_len = if let Some(end_offset) = end {
3826 end_offset.saturating_sub(line_start)
3827 } else {
3828 self.total_bytes().saturating_sub(line_start)
3829 };
3830
3831 if line_len > 0 {
3832 let Some(line_bytes) = self.get_text_range(line_start, line_len) else {
3834 return line_start;
3835 };
3836 let line_str = String::from_utf8_lossy(&line_bytes);
3837
3838 let mut utf16_count = 0;
3840 let mut byte_offset = 0;
3841
3842 for ch in line_str.chars() {
3843 if utf16_count >= utf16_offset {
3844 break;
3845 }
3846 utf16_count += ch.len_utf16();
3847 byte_offset += ch.len_utf8();
3848 }
3849
3850 line_start + byte_offset
3851 } else {
3852 line_start
3853 }
3854 } else {
3855 self.len()
3857 }
3858 }
3859
3860 pub fn prev_char_boundary(&self, pos: usize) -> usize {
3864 if pos == 0 {
3865 return 0;
3866 }
3867
3868 let start = pos.saturating_sub(4);
3870 let Some(bytes) = self.get_text_range(start, pos - start) else {
3871 return pos;
3873 };
3874
3875 for i in (0..bytes.len()).rev() {
3877 let byte = bytes[i];
3878 if (byte & 0b1100_0000) != 0b1000_0000 {
3880 return start + i;
3881 }
3882 }
3883
3884 pos.saturating_sub(1)
3886 }
3887
3888 pub fn next_char_boundary(&self, pos: usize) -> usize {
3890 let len = self.len();
3891 if pos >= len {
3892 return len;
3893 }
3894
3895 let end = (pos + 5).min(len);
3897 let Some(bytes) = self.get_text_range(pos, end - pos) else {
3898 return pos;
3900 };
3901
3902 for (i, &byte) in bytes.iter().enumerate().skip(1) {
3904 if (byte & 0b1100_0000) != 0b1000_0000 {
3906 return pos + i;
3907 }
3908 }
3909
3910 end
3912 }
3913
3914 #[inline]
3918 fn is_utf8_continuation_byte(byte: u8) -> bool {
3919 (byte & 0b1100_0000) == 0b1000_0000
3920 }
3921
3922 pub fn snap_to_char_boundary(&self, pos: usize) -> usize {
3926 let len = self.len();
3927 if pos == 0 || pos >= len {
3928 return pos.min(len);
3929 }
3930
3931 let Some(bytes) = self.get_text_range(pos, 1) else {
3933 return pos;
3935 };
3936
3937 if !Self::is_utf8_continuation_byte(bytes[0]) {
3939 return pos;
3941 }
3942
3943 self.prev_char_boundary(pos)
3945 }
3946
3947 pub fn prev_grapheme_boundary(&self, pos: usize) -> usize {
3959 if pos == 0 {
3960 return 0;
3961 }
3962
3963 let mut lookback: usize = 32;
3964 loop {
3965 let raw_start = pos.saturating_sub(lookback);
3968 let start = if raw_start == 0 {
3969 0
3970 } else {
3971 self.prev_char_boundary(raw_start + 1)
3973 };
3974
3975 let Some(bytes) = self.get_text_range(start, pos - start) else {
3976 return self.prev_char_boundary(pos);
3978 };
3979
3980 let text = match std::str::from_utf8(&bytes) {
3981 Ok(s) => s,
3982 Err(e) => {
3983 let valid_bytes = &bytes[..e.valid_up_to()];
3986 match std::str::from_utf8(valid_bytes) {
3987 Ok(s) if !s.is_empty() => s,
3988 _ => return self.prev_char_boundary(pos),
3989 }
3990 }
3991 };
3992
3993 let rel_pos = pos - start;
3995 let new_rel_pos = grapheme::prev_grapheme_boundary(text, rel_pos);
3996
3997 if new_rel_pos > 0 || start == 0 {
4002 return start + new_rel_pos;
4003 }
4004
4005 if lookback >= pos {
4007 return 0;
4008 }
4009 lookback = lookback.saturating_mul(2);
4010 }
4011 }
4012
4013 pub fn next_grapheme_boundary(&self, pos: usize) -> usize {
4023 let len = self.len();
4024 if pos >= len {
4025 return len;
4026 }
4027
4028 let mut lookahead: usize = 32;
4029 loop {
4030 let end = (pos + lookahead).min(len);
4031 let Some(bytes) = self.get_text_range(pos, end - pos) else {
4032 return self.next_char_boundary(pos);
4034 };
4035
4036 let text = match std::str::from_utf8(&bytes) {
4039 Ok(s) => s,
4040 Err(e) => {
4041 let valid_bytes = &bytes[..e.valid_up_to()];
4044 match std::str::from_utf8(valid_bytes) {
4045 Ok(s) if !s.is_empty() => s,
4046 _ => return self.next_char_boundary(pos),
4047 }
4048 }
4049 };
4050
4051 let new_rel_pos = grapheme::next_grapheme_boundary(text, 0);
4052
4053 if new_rel_pos == text.len() && end < len {
4057 if lookahead >= len - pos {
4058 return len;
4059 }
4060 lookahead = lookahead.saturating_mul(2);
4061 continue;
4062 }
4063
4064 return pos + new_rel_pos;
4065 }
4066 }
4067
4068 pub fn prev_word_boundary(&self, pos: usize) -> usize {
4070 if pos == 0 {
4071 return 0;
4072 }
4073
4074 let start = pos.saturating_sub(256).max(0);
4076 let Some(bytes) = self.get_text_range(start, pos - start) else {
4077 return pos;
4079 };
4080 let text = String::from_utf8_lossy(&bytes);
4081
4082 let mut found_word_char = false;
4083 let chars: Vec<char> = text.chars().collect();
4084
4085 for i in (0..chars.len()).rev() {
4086 let ch = chars[i];
4087 let is_word_char = ch.is_alphanumeric() || ch == '_';
4088
4089 if found_word_char && !is_word_char {
4090 let byte_offset: usize = chars[0..=i].iter().map(|c| c.len_utf8()).sum();
4093 return start + byte_offset;
4094 }
4095
4096 if is_word_char {
4097 found_word_char = true;
4098 }
4099 }
4100
4101 0
4102 }
4103
4104 pub fn next_word_boundary(&self, pos: usize) -> usize {
4106 let len = self.len();
4107 if pos >= len {
4108 return len;
4109 }
4110
4111 let end = (pos + 256).min(len);
4113 let Some(bytes) = self.get_text_range(pos, end - pos) else {
4114 return pos;
4116 };
4117 let text = String::from_utf8_lossy(&bytes);
4118
4119 let mut found_word_char = false;
4120 let mut byte_offset = 0;
4121
4122 for ch in text.chars() {
4123 let is_word_char = ch.is_alphanumeric() || ch == '_';
4124
4125 if found_word_char && !is_word_char {
4126 return pos + byte_offset;
4128 }
4129
4130 if is_word_char {
4131 found_word_char = true;
4132 }
4133
4134 byte_offset += ch.len_utf8();
4135 }
4136
4137 len
4138 }
4139
4140 pub fn line_iterator(
4145 &mut self,
4146 byte_pos: usize,
4147 estimated_line_length: usize,
4148 ) -> LineIterator<'_> {
4149 LineIterator::new(self, byte_pos, estimated_line_length)
4150 }
4151
4152 pub fn iter_lines_from(
4166 &mut self,
4167 byte_pos: usize,
4168 max_lines: usize,
4169 ) -> Result<TextBufferLineIterator> {
4170 TextBufferLineIterator::new(self, byte_pos, max_lines)
4171 }
4172
4173 pub fn get_line_number(&self, byte_offset: usize) -> usize {
4186 self.offset_to_position(byte_offset)
4187 .map(|pos| pos.line)
4188 .unwrap_or_else(|| {
4189 byte_offset / self.config.estimated_line_length
4191 })
4192 }
4193
4194 pub fn estimated_line_length(&self) -> usize {
4196 self.config.estimated_line_length
4197 }
4198
4199 pub fn populate_line_cache(&mut self, start_byte: usize, _line_count: usize) -> usize {
4233 self.get_line_number(start_byte)
4236 }
4237
4238 pub fn get_cached_byte_offset_for_line(&self, line_number: usize) -> Option<usize> {
4240 self.line_start_offset(line_number)
4241 }
4242
4243 pub fn invalidate_line_cache_from(&mut self, _byte_offset: usize) {
4245 }
4247
4248 pub fn handle_line_cache_insertion(&mut self, _byte_offset: usize, _bytes_inserted: usize) {
4250 }
4252
4253 pub fn handle_line_cache_deletion(&mut self, _byte_offset: usize, _bytes_deleted: usize) {
4255 }
4257
4258 pub fn clear_line_cache(&mut self) {
4260 }
4262
4263 #[cfg(test)]
4267 pub fn from_str_test(s: &str) -> Self {
4268 Self::from_bytes(
4269 s.as_bytes().to_vec(),
4270 std::sync::Arc::new(crate::model::filesystem::StdFileSystem),
4271 )
4272 }
4273
4274 #[cfg(test)]
4276 pub fn new_test() -> Self {
4277 Self::empty(std::sync::Arc::new(crate::model::filesystem::StdFileSystem))
4278 }
4279}
4280
4281pub type Buffer = TextBuffer;
4283
4284pub use crate::primitives::line_iterator::LineIterator;
4286
4287#[derive(Debug)]
4293pub struct ChunkInfo {
4294 pub buffer: Vec<u8>,
4296
4297 pub absolute_pos: usize,
4299
4300 pub valid_start: usize,
4303}
4304
4305pub struct OverlappingChunks<'a> {
4333 piece_iter: PieceRangeIter,
4334 buffers: &'a [StringBuffer],
4335
4336 buffer: Vec<u8>,
4338 buffer_absolute_pos: usize,
4339
4340 current_pos: usize,
4342 end_pos: usize,
4343
4344 chunk_size: usize,
4346 overlap: usize,
4347
4348 first_chunk: bool,
4350
4351 current_piece_data: Option<Vec<u8>>,
4353 current_piece_offset: usize,
4354}
4355
4356impl<'a> OverlappingChunks<'a> {
4357 pub fn new(
4372 text_buffer: &'a TextBuffer,
4373 start: usize,
4374 end: usize,
4375 chunk_size: usize,
4376 overlap: usize,
4377 ) -> Self {
4378 let piece_iter = text_buffer.piece_tree.iter_pieces_in_range(start, end);
4379
4380 Self {
4381 piece_iter,
4382 buffers: &text_buffer.buffers,
4383 buffer: Vec::with_capacity(chunk_size + overlap),
4384 buffer_absolute_pos: start,
4385 current_pos: start,
4386 end_pos: end,
4387 chunk_size,
4388 overlap,
4389 first_chunk: true,
4390 current_piece_data: None,
4391 current_piece_offset: 0,
4392 }
4393 }
4394
4395 fn read_byte(&mut self) -> Option<u8> {
4397 loop {
4398 if let Some(ref data) = self.current_piece_data {
4400 if self.current_piece_offset < data.len() {
4401 let byte = data[self.current_piece_offset];
4402 self.current_piece_offset += 1;
4403 self.current_pos += 1;
4404 return Some(byte);
4405 } else {
4406 self.current_piece_data = None;
4408 self.current_piece_offset = 0;
4409 }
4410 }
4411
4412 if let Some(piece_view) = self.piece_iter.next() {
4414 let buffer_id = piece_view.location.buffer_id();
4415 if let Some(buffer) = self.buffers.get(buffer_id) {
4416 let piece_start_in_doc = piece_view.doc_offset;
4418 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
4419
4420 let read_start = self.current_pos.max(piece_start_in_doc);
4422 let read_end = self.end_pos.min(piece_end_in_doc);
4423
4424 if read_end > read_start {
4425 let offset_in_piece = read_start - piece_start_in_doc;
4426 let bytes_to_read = read_end - read_start;
4427
4428 let buffer_start = piece_view.buffer_offset + offset_in_piece;
4429 let buffer_end = buffer_start + bytes_to_read;
4430
4431 if let Some(data) = buffer.get_data() {
4432 if buffer_end <= data.len() {
4433 self.current_piece_data =
4435 Some(data[buffer_start..buffer_end].to_vec());
4436 self.current_piece_offset = 0;
4437 continue;
4438 }
4439 }
4440 }
4441 }
4442 }
4443
4444 return None;
4446 }
4447 }
4448
4449 fn fill_next_chunk(&mut self) -> bool {
4451 if self.first_chunk {
4452 self.first_chunk = false;
4454 while self.buffer.len() < self.chunk_size && self.current_pos < self.end_pos {
4455 if let Some(byte) = self.read_byte() {
4456 self.buffer.push(byte);
4457 } else {
4458 break;
4459 }
4460 }
4461 !self.buffer.is_empty()
4462 } else {
4463 if self.current_pos >= self.end_pos {
4465 return false;
4466 }
4467
4468 if self.buffer.len() > self.overlap {
4470 let drain_amount = self.buffer.len() - self.overlap;
4471 self.buffer.drain(0..drain_amount);
4472 self.buffer_absolute_pos += drain_amount;
4473 }
4474
4475 let before_len = self.buffer.len();
4477 let target_len = self.overlap + self.chunk_size;
4478 while self.buffer.len() < target_len && self.current_pos < self.end_pos {
4479 if let Some(byte) = self.read_byte() {
4480 self.buffer.push(byte);
4481 } else {
4482 break;
4483 }
4484 }
4485
4486 self.buffer.len() > before_len
4488 }
4489 }
4490}
4491
4492impl<'a> Iterator for OverlappingChunks<'a> {
4493 type Item = ChunkInfo;
4494
4495 fn next(&mut self) -> Option<Self::Item> {
4496 let is_first = self.buffer_absolute_pos == self.current_pos;
4498
4499 if !self.fill_next_chunk() {
4500 return None;
4501 }
4502
4503 let valid_start = if is_first {
4506 0
4507 } else {
4508 self.overlap.min(self.buffer.len())
4509 };
4510
4511 Some(ChunkInfo {
4512 buffer: self.buffer.clone(),
4513 absolute_pos: self.buffer_absolute_pos,
4514 valid_start,
4515 })
4516 }
4517}
4518
4519#[derive(Debug)]
4522pub(crate) enum SearchRegion {
4523 Unloaded {
4525 file_offset: usize,
4526 bytes: usize,
4527 doc_offset: usize,
4528 },
4529 Loaded { data: Vec<u8>, doc_offset: usize },
4531}
4532
4533#[derive(Debug)]
4541pub struct HybridSearchPlan {
4542 pub(crate) file_path: PathBuf,
4543 pub(crate) regions: Vec<SearchRegion>,
4544}
4545
4546impl HybridSearchPlan {
4547 pub fn execute(
4550 &self,
4551 fs: &dyn FileSystem,
4552 pattern: &str,
4553 opts: &FileSearchOptions,
4554 regex: &Regex,
4555 max_matches: usize,
4556 query_len: usize,
4557 ) -> io::Result<Vec<SearchMatch>> {
4558 if self.regions.is_empty() {
4559 return Ok(vec![]);
4560 }
4561
4562 if self.regions.len() == 1 {
4564 if let SearchRegion::Unloaded { .. } = &self.regions[0] {
4565 let mut cursor = FileSearchCursor::new();
4566 let mut all_matches = Vec::new();
4567 while !cursor.done && all_matches.len() < max_matches {
4568 let batch = fs.search_file(&self.file_path, pattern, opts, &mut cursor)?;
4569 all_matches.extend(batch);
4570 }
4571 all_matches.truncate(max_matches);
4572 return Ok(all_matches);
4573 }
4574 }
4575
4576 let overlap_size = query_len.max(256);
4577 let mut all_matches: Vec<SearchMatch> = Vec::new();
4578 let mut running_line: usize = 1;
4579 let mut prev_tail: Vec<u8> = Vec::new();
4580
4581 for region in &self.regions {
4582 if all_matches.len() >= max_matches {
4583 break;
4584 }
4585 let remaining = max_matches - all_matches.len();
4586
4587 match region {
4588 SearchRegion::Unloaded {
4589 file_offset,
4590 bytes,
4591 doc_offset: region_doc_offset,
4592 } => {
4593 if !prev_tail.is_empty() {
4595 let overlap_read = (*bytes).min(overlap_size);
4596 if let Ok(head) =
4597 fs.read_range(&self.file_path, *file_offset as u64, overlap_read)
4598 {
4599 let boundary = search_boundary_overlap(
4600 &prev_tail,
4601 &head,
4602 *region_doc_offset - prev_tail.len(),
4603 running_line,
4604 regex,
4605 remaining,
4606 );
4607 all_matches.extend(boundary);
4608 }
4609 }
4610
4611 let mut opts_bounded = opts.clone();
4613 opts_bounded.max_matches = remaining.saturating_sub(all_matches.len());
4614 let mut cursor = FileSearchCursor::for_range(
4615 *file_offset,
4616 *file_offset + *bytes,
4617 running_line,
4618 );
4619 while !cursor.done && all_matches.len() < max_matches {
4620 let mut batch =
4621 fs.search_file(&self.file_path, pattern, &opts_bounded, &mut cursor)?;
4622 for m in &mut batch {
4624 m.byte_offset = *region_doc_offset + (m.byte_offset - *file_offset);
4625 }
4626 all_matches.extend(batch);
4627 }
4628 running_line = cursor.running_line;
4629
4630 if *bytes >= overlap_size {
4632 let tail_off = *file_offset + *bytes - overlap_size;
4633 prev_tail = fs
4634 .read_range(&self.file_path, tail_off as u64, overlap_size)
4635 .unwrap_or_default();
4636 } else {
4637 prev_tail = fs
4638 .read_range(&self.file_path, *file_offset as u64, *bytes)
4639 .unwrap_or_default();
4640 }
4641 }
4642 SearchRegion::Loaded {
4643 data,
4644 doc_offset: region_doc_offset,
4645 } => {
4646 let mut search_buf = Vec::with_capacity(prev_tail.len() + data.len());
4648 search_buf.extend_from_slice(&prev_tail);
4649 search_buf.extend_from_slice(data);
4650
4651 let overlap_len = prev_tail.len();
4652 let buf_doc_offset = if overlap_len > 0 {
4653 *region_doc_offset - overlap_len
4654 } else {
4655 *region_doc_offset
4656 };
4657
4658 let newlines_in_overlap = search_buf[..overlap_len]
4659 .iter()
4660 .filter(|&&b| b == b'\n')
4661 .count();
4662 let mut line_at = running_line.saturating_sub(newlines_in_overlap);
4663 let mut counted_to = 0usize;
4664
4665 for m in regex.find_iter(&search_buf) {
4666 if overlap_len > 0 && m.end() <= overlap_len {
4667 continue;
4668 }
4669 if all_matches.len() >= max_matches {
4670 break;
4671 }
4672
4673 line_at += search_buf[counted_to..m.start()]
4674 .iter()
4675 .filter(|&&b| b == b'\n')
4676 .count();
4677 counted_to = m.start();
4678
4679 let line_start = search_buf[..m.start()]
4680 .iter()
4681 .rposition(|&b| b == b'\n')
4682 .map(|p| p + 1)
4683 .unwrap_or(0);
4684 let line_end = search_buf[m.start()..]
4685 .iter()
4686 .position(|&b| b == b'\n')
4687 .map(|p| m.start() + p)
4688 .unwrap_or(search_buf.len());
4689
4690 let match_doc_offset = buf_doc_offset + m.start();
4691 let column = m.start() - line_start + 1;
4692 let context =
4693 String::from_utf8_lossy(&search_buf[line_start..line_end]).into_owned();
4694
4695 all_matches.push(SearchMatch {
4696 byte_offset: match_doc_offset,
4697 length: m.end() - m.start(),
4698 line: line_at,
4699 column,
4700 context,
4701 });
4702 }
4703
4704 running_line += data.iter().filter(|&&b| b == b'\n').count();
4705
4706 let tail_start = data.len().saturating_sub(overlap_size);
4707 prev_tail = data[tail_start..].to_vec();
4708 }
4709 }
4710 }
4711
4712 all_matches.truncate(max_matches);
4713 Ok(all_matches)
4714 }
4715}
4716
4717fn search_boundary_overlap(
4723 prev_tail: &[u8],
4724 next_head: &[u8],
4725 doc_offset: usize,
4726 running_line: usize,
4727 regex: &Regex,
4728 max_matches: usize,
4729) -> Vec<SearchMatch> {
4730 let mut buf = Vec::with_capacity(prev_tail.len() + next_head.len());
4731 buf.extend_from_slice(prev_tail);
4732 buf.extend_from_slice(next_head);
4733
4734 let overlap_len = prev_tail.len();
4735 let newlines_before = prev_tail.iter().filter(|&&b| b == b'\n').count();
4736 let mut line_at = running_line.saturating_sub(newlines_before);
4737 let mut counted_to = 0usize;
4738 let mut matches = Vec::new();
4739
4740 for m in regex.find_iter(&buf) {
4741 if m.start() < overlap_len && m.end() > overlap_len {
4743 if matches.len() >= max_matches {
4744 break;
4745 }
4746
4747 line_at += buf[counted_to..m.start()]
4748 .iter()
4749 .filter(|&&b| b == b'\n')
4750 .count();
4751 counted_to = m.start();
4752
4753 let line_start = buf[..m.start()]
4754 .iter()
4755 .rposition(|&b| b == b'\n')
4756 .map(|p| p + 1)
4757 .unwrap_or(0);
4758 let line_end = buf[m.start()..]
4759 .iter()
4760 .position(|&b| b == b'\n')
4761 .map(|p| m.start() + p)
4762 .unwrap_or(buf.len());
4763
4764 let column = m.start() - line_start + 1;
4765 let context = String::from_utf8_lossy(&buf[line_start..line_end]).into_owned();
4766
4767 matches.push(SearchMatch {
4768 byte_offset: doc_offset + m.start(),
4769 length: m.end() - m.start(),
4770 line: line_at,
4771 column,
4772 context,
4773 });
4774 }
4775 }
4776 matches
4777}
4778
4779#[cfg(test)]
4780mod tests {
4781 use crate::model::filesystem::StdFileSystem;
4782 use std::sync::Arc;
4783
4784 fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
4785 Arc::new(StdFileSystem)
4786 }
4787 use super::*;
4788
4789 #[test]
4790 fn test_empty_buffer() {
4791 let buffer = TextBuffer::empty(test_fs());
4792 assert_eq!(buffer.total_bytes(), 0);
4793 assert_eq!(buffer.line_count(), Some(1)); }
4795
4796 #[test]
4797 fn test_line_positions_multiline() {
4798 let buffer = TextBuffer::from_bytes(b"Hello\nNew Line\nWorld!".to_vec(), test_fs());
4799
4800 assert_eq!(buffer.line_count(), Some(3));
4802
4803 assert_eq!(buffer.line_start_offset(0), Some(0)); assert_eq!(buffer.line_start_offset(1), Some(6)); assert_eq!(buffer.line_start_offset(2), Some(15)); assert_eq!(buffer.offset_to_position(0).unwrap().line, 0); assert_eq!(buffer.offset_to_position(5).unwrap().line, 0); assert_eq!(buffer.offset_to_position(6).unwrap().line, 1); assert_eq!(buffer.offset_to_position(14).unwrap().line, 1); assert_eq!(buffer.offset_to_position(15).unwrap().line, 2); assert_eq!(buffer.line_col_to_position(0, 5), 5); assert_eq!(buffer.line_col_to_position(1, 0), 6); assert_eq!(buffer.line_col_to_position(1, 8), 14); assert_eq!(buffer.line_col_to_position(2, 0), 15); }
4821
4822 #[test]
4823 fn test_new_from_content() {
4824 let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4825 assert_eq!(buffer.total_bytes(), 11);
4826 assert_eq!(buffer.line_count(), Some(2));
4827 }
4828
4829 #[test]
4830 fn test_get_all_text() {
4831 let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4832 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld");
4833 }
4834
4835 #[test]
4836 fn test_insert_at_start() {
4837 let mut buffer = TextBuffer::from_bytes(b"world".to_vec(), test_fs());
4838 buffer.insert_bytes(0, b"hello ".to_vec());
4839
4840 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4841 assert_eq!(buffer.total_bytes(), 11);
4842 }
4843
4844 #[test]
4845 fn test_insert_in_middle() {
4846 let mut buffer = TextBuffer::from_bytes(b"helloworld".to_vec(), test_fs());
4847 buffer.insert_bytes(5, b" ".to_vec());
4848
4849 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4850 assert_eq!(buffer.total_bytes(), 11);
4851 }
4852
4853 #[test]
4854 fn test_insert_at_end() {
4855 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4856 buffer.insert_bytes(5, b" world".to_vec());
4857
4858 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4859 assert_eq!(buffer.total_bytes(), 11);
4860 }
4861
4862 #[test]
4863 fn test_insert_with_newlines() {
4864 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4865 buffer.insert_bytes(5, b"\nworld\ntest".to_vec());
4866
4867 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld\ntest");
4868 assert_eq!(buffer.line_count(), Some(3));
4869 }
4870
4871 #[test]
4872 fn test_delete_from_start() {
4873 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4874 buffer.delete_bytes(0, 6);
4875
4876 assert_eq!(buffer.get_all_text().unwrap(), b"world");
4877 assert_eq!(buffer.total_bytes(), 5);
4878 }
4879
4880 #[test]
4881 fn test_delete_from_middle() {
4882 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4883 buffer.delete_bytes(5, 1);
4884
4885 assert_eq!(buffer.get_all_text().unwrap(), b"helloworld");
4886 assert_eq!(buffer.total_bytes(), 10);
4887 }
4888
4889 #[test]
4890 fn test_delete_from_end() {
4891 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4892 buffer.delete_bytes(6, 5);
4893
4894 assert_eq!(buffer.get_all_text().unwrap(), b"hello ");
4895 assert_eq!(buffer.total_bytes(), 6);
4896 }
4897
4898 #[test]
4899 fn test_delete_with_newlines() {
4900 let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4901 buffer.delete_bytes(5, 7); assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
4904 assert_eq!(buffer.line_count(), Some(1));
4905 }
4906
4907 #[test]
4908 fn test_offset_position_conversions() {
4909 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4910
4911 let pos = buffer.offset_to_position(0);
4912 assert_eq!(pos, Some(Position { line: 0, column: 0 }));
4913
4914 let pos = buffer.offset_to_position(6);
4915 assert_eq!(pos, Some(Position { line: 1, column: 0 }));
4916
4917 let offset = buffer.position_to_offset(Position { line: 1, column: 0 });
4918 assert_eq!(offset, 6);
4919 }
4920
4921 #[test]
4922 fn test_insert_at_position() {
4923 let mut buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4924 buffer.insert_at_position(Position { line: 1, column: 0 }, b"beautiful ".to_vec());
4925
4926 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nbeautiful world");
4927 }
4928
4929 #[test]
4930 fn test_delete_range() {
4931 let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4932
4933 let start = Position { line: 0, column: 5 };
4934 let end = Position { line: 2, column: 0 };
4935 buffer.delete_range(start, end);
4936
4937 assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
4938 }
4939
4940 #[test]
4941 fn test_get_line() {
4942 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4943
4944 assert_eq!(buffer.get_line(0), Some(b"hello\n".to_vec()));
4945 assert_eq!(buffer.get_line(1), Some(b"world\n".to_vec()));
4946 assert_eq!(buffer.get_line(2), Some(b"test".to_vec()));
4947 assert_eq!(buffer.get_line(3), None);
4948 }
4949
4950 #[test]
4951 fn test_multiple_operations() {
4952 let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
4953
4954 buffer.insert_bytes(0, b"start\n".to_vec());
4955 assert_eq!(buffer.line_count(), Some(4));
4956
4957 buffer.delete_bytes(6, 6); assert_eq!(buffer.line_count(), Some(3));
4959
4960 buffer.insert_bytes(6, b"new\n".to_vec());
4961 assert_eq!(buffer.line_count(), Some(4));
4962
4963 let text = buffer.get_all_text().unwrap();
4964 assert_eq!(text, b"start\nnew\nline2\nline3");
4965 }
4966
4967 #[test]
4968 fn test_get_text_range() {
4969 let buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4970
4971 assert_eq!(buffer.get_text_range(0, 5), Some(b"hello".to_vec()));
4972 assert_eq!(buffer.get_text_range(6, 5), Some(b"world".to_vec()));
4973 assert_eq!(buffer.get_text_range(0, 11), Some(b"hello world".to_vec()));
4974 }
4975
4976 #[test]
4977 fn test_empty_operations() {
4978 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4979
4980 buffer.insert_bytes(2, Vec::new());
4981 assert_eq!(buffer.get_all_text().unwrap(), b"hello");
4982
4983 buffer.delete_bytes(2, 0);
4984 assert_eq!(buffer.get_all_text().unwrap(), b"hello");
4985 }
4986
4987 #[test]
4988 fn test_sequential_inserts_at_beginning() {
4989 let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
4991
4992 buffer.delete_bytes(0, 12);
4994 assert_eq!(buffer.get_all_text().unwrap(), b"");
4995
4996 buffer.insert_bytes(0, vec![b'a']);
4998 assert_eq!(buffer.get_all_text().unwrap(), b"a");
4999
5000 buffer.insert_bytes(0, vec![b'b']);
5002 assert_eq!(buffer.get_all_text().unwrap(), b"ba");
5003 }
5004
5005 mod large_file_support {
5008 use super::*;
5009 use crate::model::piece_tree::StringBuffer;
5010 use std::fs::File;
5011 use std::io::Write;
5012 use tempfile::TempDir;
5013
5014 #[test]
5017 fn test_line_feed_count_is_some_for_loaded_buffer() {
5018 let buffer = StringBuffer::new(0, b"hello\nworld\ntest".to_vec());
5019 assert_eq!(buffer.line_feed_count(), Some(2));
5020 }
5021
5022 #[test]
5023 fn test_line_feed_count_is_none_for_unloaded_buffer() {
5024 let temp_dir = TempDir::new().unwrap();
5025 let file_path = temp_dir.path().join("test.txt");
5026
5027 let buffer = StringBuffer::new_unloaded(0, file_path, 0, 100);
5028 assert_eq!(buffer.line_feed_count(), None);
5029 }
5030
5031 #[test]
5032 fn test_line_count_is_some_for_small_buffer() {
5033 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
5034 assert_eq!(buffer.line_count(), Some(3));
5035 }
5036
5037 #[test]
5038 fn test_piece_tree_works_with_none_line_count() {
5039 let buffer = StringBuffer::new_loaded(0, b"hello\nworld".to_vec(), false);
5041 assert_eq!(buffer.line_feed_count(), None);
5042
5043 use crate::model::piece_tree::{BufferLocation, PieceTree};
5045 let tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, None);
5046
5047 assert_eq!(tree.line_count(), None);
5049 }
5050
5051 #[test]
5054 fn test_buffer_data_loaded_variant() {
5055 let data = b"hello world".to_vec();
5056 let buffer = StringBuffer::new_loaded(0, data.clone(), true);
5057
5058 assert!(buffer.is_loaded());
5059 assert_eq!(buffer.get_data(), Some(&data[..]));
5060 assert!(buffer.get_line_starts().is_some());
5061 }
5062
5063 #[test]
5064 fn test_buffer_data_loaded_without_line_starts() {
5065 let data = b"hello\nworld".to_vec();
5066 let buffer = StringBuffer::new_loaded(0, data.clone(), false);
5067
5068 assert!(buffer.is_loaded());
5069 assert_eq!(buffer.get_data(), Some(&data[..]));
5070 assert_eq!(buffer.get_line_starts(), None); }
5072
5073 #[test]
5074 fn test_buffer_data_unloaded_variant() {
5075 let temp_dir = TempDir::new().unwrap();
5076 let file_path = temp_dir.path().join("test.txt");
5077
5078 let buffer = StringBuffer::new_unloaded(0, file_path.clone(), 0, 100);
5079
5080 assert!(!buffer.is_loaded());
5081 assert_eq!(buffer.get_data(), None);
5082 assert_eq!(buffer.get_line_starts(), None);
5083 }
5084
5085 #[test]
5086 fn test_buffer_load_method() {
5087 let temp_dir = TempDir::new().unwrap();
5088 let file_path = temp_dir.path().join("test.txt");
5089
5090 let test_data = b"hello world";
5092 File::create(&file_path)
5093 .unwrap()
5094 .write_all(test_data)
5095 .unwrap();
5096
5097 let mut buffer = StringBuffer::new_unloaded(0, file_path, 0, test_data.len());
5099 assert!(!buffer.is_loaded());
5100
5101 let fs = crate::model::filesystem::StdFileSystem;
5103 buffer.load(&fs).unwrap();
5104
5105 assert!(buffer.is_loaded());
5107 assert_eq!(buffer.get_data(), Some(&test_data[..]));
5108 }
5109
5110 #[test]
5111 fn test_string_buffer_new_vs_new_loaded() {
5112 let data = b"hello\nworld".to_vec();
5113
5114 let buf1 = StringBuffer::new(0, data.clone());
5116 assert!(buf1.is_loaded());
5117 assert!(buf1.get_line_starts().is_some());
5118 assert_eq!(buf1.line_feed_count(), Some(1));
5119
5120 let buf2 = StringBuffer::new_loaded(0, data.clone(), false);
5122 assert!(buf2.is_loaded());
5123 assert_eq!(buf2.get_line_starts(), None);
5124 assert_eq!(buf2.line_feed_count(), None);
5125 }
5126
5127 #[test]
5130 fn test_load_small_file_eager_loading() {
5131 let temp_dir = TempDir::new().unwrap();
5132 let file_path = temp_dir.path().join("small.txt");
5133
5134 let test_data = b"hello\ntest";
5136 File::create(&file_path)
5137 .unwrap()
5138 .write_all(test_data)
5139 .unwrap();
5140
5141 let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
5143
5144 assert!(!buffer.large_file);
5146 assert_eq!(buffer.total_bytes(), test_data.len());
5147 assert_eq!(buffer.line_count(), Some(2)); assert_eq!(buffer.get_all_text().unwrap(), test_data);
5149
5150 assert!(buffer.buffers[0].is_loaded());
5152 }
5153
5154 #[test]
5155 fn test_load_large_file_lazy_loading() {
5156 let temp_dir = TempDir::new().unwrap();
5157 let file_path = temp_dir.path().join("large.txt");
5158
5159 let test_data = b"hello\nworld\ntest";
5161 File::create(&file_path)
5162 .unwrap()
5163 .write_all(test_data)
5164 .unwrap();
5165
5166 let buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
5168
5169 assert!(buffer.large_file);
5171 assert_eq!(buffer.total_bytes(), test_data.len());
5172
5173 assert_eq!(buffer.line_count(), None);
5175
5176 assert!(!buffer.buffers[0].is_loaded());
5178 assert_eq!(buffer.buffers[0].get_data(), None);
5179 }
5180
5181 #[test]
5189 fn test_issue_657_search_on_large_file_unloaded_buffer() {
5190 let temp_dir = TempDir::new().unwrap();
5191 let file_path = temp_dir.path().join("large_search_test.txt");
5192
5193 let test_data = b"line1\nline2\nSEARCH_TARGET\nline4\nline5";
5195 File::create(&file_path)
5196 .unwrap()
5197 .write_all(test_data)
5198 .unwrap();
5199
5200 let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
5202
5203 assert!(buffer.large_file, "Buffer should be in large file mode");
5205 assert!(
5206 !buffer.buffers[0].is_loaded(),
5207 "Buffer should be unloaded initially"
5208 );
5209
5210 assert!(
5213 buffer.to_string().is_none(),
5214 "BUG REPRODUCED: to_string() returns None for unloaded buffer"
5215 );
5216
5217 let total_bytes = buffer.len();
5219 let content = buffer.get_text_range_mut(0, total_bytes).unwrap();
5220 let content_str = String::from_utf8_lossy(&content);
5221
5222 assert!(
5224 content_str.contains("SEARCH_TARGET"),
5225 "FIX WORKS: get_text_range_mut() loaded the buffer and found the search target"
5226 );
5227
5228 assert!(
5230 buffer.to_string().is_some(),
5231 "After get_text_range_mut(), to_string() should work"
5232 );
5233 }
5234
5235 #[test]
5236 fn test_large_file_threshold_boundary() {
5237 let temp_dir = TempDir::new().unwrap();
5238
5239 let file_path = temp_dir.path().join("at_threshold.txt");
5241 let test_data = vec![b'x'; 100];
5242 File::create(&file_path)
5243 .unwrap()
5244 .write_all(&test_data)
5245 .unwrap();
5246
5247 let buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
5249 assert!(buffer.large_file);
5250
5251 let file_path2 = temp_dir.path().join("below_threshold.txt");
5253 let test_data2 = vec![b'x'; 99];
5254 File::create(&file_path2)
5255 .unwrap()
5256 .write_all(&test_data2)
5257 .unwrap();
5258
5259 let buffer2 = TextBuffer::load_from_file(&file_path2, 100, test_fs()).unwrap();
5261 assert!(!buffer2.large_file);
5262 }
5263
5264 #[test]
5265 fn test_large_file_default_threshold() {
5266 let temp_dir = TempDir::new().unwrap();
5267 let file_path = temp_dir.path().join("test.txt");
5268
5269 File::create(&file_path)
5271 .unwrap()
5272 .write_all(b"hello")
5273 .unwrap();
5274
5275 let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
5277
5278 assert!(!buffer.large_file);
5280 }
5281
5282 #[test]
5283 fn test_large_file_has_correct_piece_tree_structure() {
5284 let temp_dir = TempDir::new().unwrap();
5285 let file_path = temp_dir.path().join("large.txt");
5286
5287 let test_data = b"hello world";
5288 File::create(&file_path)
5289 .unwrap()
5290 .write_all(test_data)
5291 .unwrap();
5292
5293 let buffer = TextBuffer::load_from_file(&file_path, 5, test_fs()).unwrap();
5295
5296 assert_eq!(buffer.total_bytes(), test_data.len());
5298
5299 assert_eq!(buffer.buffers.len(), 1);
5301
5302 assert!(!buffer.buffers[0].is_loaded());
5304 }
5305
5306 #[test]
5307 fn test_empty_large_file() {
5308 let temp_dir = TempDir::new().unwrap();
5309 let file_path = temp_dir.path().join("empty.txt");
5310
5311 File::create(&file_path).unwrap();
5313
5314 let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
5316
5317 assert_eq!(buffer.total_bytes(), 0);
5319 assert!(buffer.is_empty());
5320 }
5321
5322 #[test]
5323 fn test_large_file_basic_api_operations() {
5324 let temp_dir = TempDir::new().unwrap();
5325 let file_path = temp_dir.path().join("large_test.txt");
5326
5327 let test_data = b"line1\nline2\nline3\nline4\n";
5329 File::create(&file_path)
5330 .unwrap()
5331 .write_all(test_data)
5332 .unwrap();
5333
5334 let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
5336
5337 assert!(buffer.large_file);
5339 assert_eq!(buffer.line_count(), None); assert_eq!(buffer.total_bytes(), test_data.len());
5343 assert!(!buffer.is_empty());
5344 assert_eq!(buffer.len(), test_data.len());
5345
5346 let range_result = buffer.get_text_range_mut(0, 5).unwrap();
5348 assert_eq!(range_result, b"line1");
5349
5350 let range_result2 = buffer.get_text_range_mut(6, 5).unwrap();
5351 assert_eq!(range_result2, b"line2");
5352
5353 let all_text = buffer.get_all_text().unwrap();
5355 assert_eq!(all_text, test_data);
5356
5357 assert_eq!(buffer.slice_bytes(0..5), b"line1");
5359
5360 buffer.insert_bytes(0, b"prefix_".to_vec());
5363 assert_eq!(buffer.total_bytes(), test_data.len() + 7);
5364 assert!(buffer.is_modified());
5365
5366 let text_after_insert = buffer.get_all_text().unwrap();
5368 assert_eq!(&text_after_insert[0..7], b"prefix_");
5369 assert_eq!(&text_after_insert[7..12], b"line1");
5370
5371 buffer.delete_bytes(0, 7);
5373 assert_eq!(buffer.total_bytes(), test_data.len());
5374
5375 let text_after_delete = buffer.get_all_text().unwrap();
5377 assert_eq!(text_after_delete, test_data);
5378
5379 let end_offset = buffer.total_bytes();
5381 buffer.insert_bytes(end_offset, b"suffix".to_vec());
5382 assert_eq!(buffer.total_bytes(), test_data.len() + 6);
5383
5384 let final_text = buffer.get_all_text().unwrap();
5386 assert!(final_text.ends_with(b"suffix"));
5387 assert_eq!(&final_text[0..test_data.len()], test_data);
5388
5389 let pos = buffer.offset_to_position(0).unwrap();
5393 assert_eq!(pos.column, 0);
5394
5395 let offset = buffer.position_to_offset(Position { line: 0, column: 0 });
5397 assert_eq!(offset, 0);
5398
5399 let replace_result = buffer.replace_range(0..5, "START");
5401 assert!(replace_result);
5402
5403 let text_after_replace = buffer.get_all_text().unwrap();
5404 assert!(text_after_replace.starts_with(b"START"));
5405 }
5406
5407 #[test]
5408 fn test_large_file_chunk_based_loading() {
5409 let temp_dir = TempDir::new().unwrap();
5410 let file_path = temp_dir.path().join("huge.txt");
5411
5412 let chunk_size = LOAD_CHUNK_SIZE; let file_size = chunk_size * 3; let mut file = File::create(&file_path).unwrap();
5419 file.write_all(&vec![b'A'; chunk_size]).unwrap();
5420 file.write_all(&vec![b'B'; chunk_size]).unwrap();
5421 file.write_all(&vec![b'C'; chunk_size]).unwrap();
5422 file.flush().unwrap();
5423
5424 let mut buffer = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
5426
5427 assert!(buffer.large_file);
5429 assert_eq!(buffer.total_bytes(), file_size);
5430
5431 assert!(!buffer.buffers[0].is_loaded());
5433
5434 let first_chunk_data = buffer.get_text_range_mut(0, 1024).unwrap();
5436 assert_eq!(first_chunk_data.len(), 1024);
5437 assert!(first_chunk_data.iter().all(|&b| b == b'A'));
5438
5439 let second_chunk_data = buffer.get_text_range_mut(chunk_size, 1024).unwrap();
5441 assert_eq!(second_chunk_data.len(), 1024);
5442 assert!(second_chunk_data.iter().all(|&b| b == b'B'));
5443
5444 let third_chunk_data = buffer.get_text_range_mut(chunk_size * 2, 1024).unwrap();
5446 assert_eq!(third_chunk_data.len(), 1024);
5447 assert!(third_chunk_data.iter().all(|&b| b == b'C'));
5448
5449 let cross_chunk_offset = chunk_size - 512;
5452 let cross_chunk_data = buffer.get_text_range_mut(cross_chunk_offset, 1024).unwrap();
5453 assert_eq!(cross_chunk_data.len(), 1024);
5454 assert!(cross_chunk_data[..512].iter().all(|&b| b == b'A'));
5456 assert!(cross_chunk_data[512..].iter().all(|&b| b == b'B'));
5457
5458 assert!(
5461 buffer.buffers.len() > 1,
5462 "Expected multiple buffers after chunk-based loading, got {}",
5463 buffer.buffers.len()
5464 );
5465
5466 buffer.insert_bytes(0, b"PREFIX".to_vec());
5468 assert_eq!(buffer.total_bytes(), file_size + 6);
5469
5470 let after_insert = buffer.get_text_range_mut(0, 6).unwrap();
5471 assert_eq!(after_insert, b"PREFIX");
5472
5473 let after_prefix = buffer.get_text_range_mut(6, 10).unwrap();
5475 assert!(after_prefix.iter().all(|&b| b == b'A'));
5476
5477 let mut buffer2 = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
5480
5481 let chunk_read_size = 64 * 1024; let mut offset = 0;
5484 while offset < file_size {
5485 let bytes_to_read = chunk_read_size.min(file_size - offset);
5486 let chunk_data = buffer2.get_text_range_mut(offset, bytes_to_read).unwrap();
5487
5488 let first_mb_end = chunk_size;
5490 let second_mb_end = chunk_size * 2;
5491
5492 for (i, &byte) in chunk_data.iter().enumerate() {
5494 let file_offset = offset + i;
5495 let expected = if file_offset < first_mb_end {
5496 b'A'
5497 } else if file_offset < second_mb_end {
5498 b'B'
5499 } else {
5500 b'C'
5501 };
5502 assert_eq!(
5503 byte, expected,
5504 "Mismatch at file offset {}: expected {}, got {}",
5505 file_offset, expected as char, byte as char
5506 );
5507 }
5508
5509 offset += bytes_to_read;
5510 }
5511 }
5512
5513 #[test]
5517 fn test_large_file_incremental_save() {
5518 let temp_dir = TempDir::new().unwrap();
5519 let file_path = temp_dir.path().join("large_save_test.txt");
5520
5521 let chunk_size = 1000; let file_size = chunk_size * 2; let mut file = File::create(&file_path).unwrap();
5526 file.write_all(&vec![b'A'; chunk_size]).unwrap();
5528 file.write_all(&vec![b'B'; chunk_size]).unwrap();
5530 file.flush().unwrap();
5531
5532 let mut buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
5534 assert!(buffer.large_file);
5535 assert_eq!(buffer.total_bytes(), file_size);
5536
5537 let first_bytes = buffer.get_text_range_mut(0, 50).unwrap();
5539 assert!(first_bytes.iter().all(|&b| b == b'A'));
5540
5541 buffer.insert_bytes(0, b"PREFIX_".to_vec());
5543
5544 let save_path = temp_dir.path().join("saved.txt");
5546 buffer.save_to_file(&save_path).unwrap();
5547
5548 let saved_content = std::fs::read(&save_path).unwrap();
5550
5551 assert_eq!(
5553 saved_content.len(),
5554 file_size + 7,
5555 "Saved file should be {} bytes, got {}",
5556 file_size + 7,
5557 saved_content.len()
5558 );
5559
5560 assert_eq!(&saved_content[..7], b"PREFIX_", "Should start with PREFIX_");
5562
5563 assert!(
5565 saved_content[7..100].iter().all(|&b| b == b'A'),
5566 "First chunk after prefix should be A's"
5567 );
5568
5569 let second_chunk_start = 7 + chunk_size;
5571 assert!(
5572 saved_content[second_chunk_start..second_chunk_start + 100]
5573 .iter()
5574 .all(|&b| b == b'B'),
5575 "Second chunk should be B's (was unloaded, should be preserved)"
5576 );
5577 }
5578
5579 #[test]
5581 fn test_large_file_save_with_multiple_edits() {
5582 let temp_dir = TempDir::new().unwrap();
5583 let file_path = temp_dir.path().join("multi_edit.txt");
5584
5585 let mut content = Vec::new();
5587 for i in 0..100 {
5588 content.extend_from_slice(
5589 format!("Line {:04}: padding to make it longer\n", i).as_bytes(),
5590 );
5591 }
5592 let original_len = content.len();
5593 std::fs::write(&file_path, &content).unwrap();
5594
5595 let mut buffer = TextBuffer::load_from_file(&file_path, 500, test_fs()).unwrap();
5597 assert!(
5598 buffer.line_count().is_none(),
5599 "Should be in large file mode"
5600 );
5601
5602 buffer.insert_bytes(0, b"[START]".to_vec());
5604
5605 let mid_offset = original_len / 2;
5607 let _mid_bytes = buffer.get_text_range_mut(mid_offset + 7, 10).unwrap(); buffer.insert_bytes(mid_offset + 7, b"[MIDDLE]".to_vec());
5609
5610 let save_path = temp_dir.path().join("multi_edit_saved.txt");
5612 buffer.save_to_file(&save_path).unwrap();
5613
5614 let saved = std::fs::read_to_string(&save_path).unwrap();
5616
5617 assert!(
5618 saved.starts_with("[START]Line 0000"),
5619 "Should start with our edit"
5620 );
5621 assert!(saved.contains("[MIDDLE]"), "Should contain middle edit");
5622 assert!(saved.contains("Line 0099"), "Should preserve end of file");
5623
5624 let expected_len = original_len + 7 + 8; assert_eq!(
5627 saved.len(),
5628 expected_len,
5629 "Length should be original + edits"
5630 );
5631 }
5632 }
5633
5634 #[test]
5638 fn test_offset_to_position_simple() {
5639 let content = b"a\nb\nc\nd";
5645 let buffer = TextBuffer::from_bytes(content.to_vec(), test_fs());
5646
5647 let pos = buffer
5649 .offset_to_position(0)
5650 .expect("small buffer should have line metadata");
5651 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5652 assert_eq!(pos.column, 0);
5653
5654 let pos = buffer
5655 .offset_to_position(1)
5656 .expect("small buffer should have line metadata");
5657 assert_eq!(pos.line, 0, "Byte 1 (newline) should be on line 0");
5658 assert_eq!(pos.column, 1);
5659
5660 let pos = buffer
5661 .offset_to_position(2)
5662 .expect("small buffer should have line metadata");
5663 assert_eq!(pos.line, 1, "Byte 2 should be on line 1");
5664 assert_eq!(pos.column, 0);
5665
5666 let pos = buffer
5667 .offset_to_position(3)
5668 .expect("small buffer should have line metadata");
5669 assert_eq!(pos.line, 1, "Byte 3 (newline) should be on line 1");
5670 assert_eq!(pos.column, 1);
5671
5672 let pos = buffer
5673 .offset_to_position(4)
5674 .expect("small buffer should have line metadata");
5675 assert_eq!(pos.line, 2, "Byte 4 should be on line 2");
5676 assert_eq!(pos.column, 0);
5677
5678 let pos = buffer
5679 .offset_to_position(6)
5680 .expect("small buffer should have line metadata");
5681 assert_eq!(pos.line, 3, "Byte 6 should be on line 3");
5682 assert_eq!(pos.column, 0);
5683 }
5684
5685 #[test]
5686 fn test_offset_to_position_after_insert() {
5687 let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
5689
5690 buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
5692
5693 let pos = buffer
5699 .offset_to_position(0)
5700 .expect("small buffer should have line metadata");
5701 assert_eq!(pos.line, 0, "Byte 0 should still be on line 0");
5702
5703 let pos = buffer
5704 .offset_to_position(2)
5705 .expect("small buffer should have line metadata");
5706 assert_eq!(
5707 pos.line, 1,
5708 "Byte 2 (start of inserted line) should be on line 1"
5709 );
5710
5711 let pos = buffer
5712 .offset_to_position(4)
5713 .expect("small buffer should have line metadata");
5714 assert_eq!(
5715 pos.line, 2,
5716 "Byte 4 (start of 'b') should be on line 2 after insert"
5717 );
5718 }
5719
5720 #[test]
5721 fn test_offset_to_position_empty_lines() {
5722 let buffer = TextBuffer::from_bytes(b"\n\n\n".to_vec(), test_fs());
5724
5725 let pos = buffer
5731 .offset_to_position(0)
5732 .expect("small buffer should have line metadata");
5733 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5734
5735 let pos = buffer
5736 .offset_to_position(1)
5737 .expect("small buffer should have line metadata");
5738 assert_eq!(pos.line, 1, "Byte 1 should be on line 1");
5739
5740 let pos = buffer
5741 .offset_to_position(2)
5742 .expect("small buffer should have line metadata");
5743 assert_eq!(pos.line, 2, "Byte 2 should be on line 2");
5744
5745 let pos = buffer
5746 .offset_to_position(3)
5747 .expect("small buffer should have line metadata");
5748 assert_eq!(pos.line, 3, "Byte 3 (EOF) should be on line 3");
5749 }
5750
5751 #[test]
5752 fn test_offset_to_position_long_lines() {
5753 let mut content = Vec::new();
5755 content.extend_from_slice(b"aaaaaaaaaa\n"); content.extend_from_slice(b"bbbbbbbbbb\n"); content.extend_from_slice(b"cccccccccc"); let buffer = TextBuffer::from_bytes(content.clone(), test_fs());
5760
5761 let pos = buffer
5763 .offset_to_position(0)
5764 .expect("small buffer should have line metadata");
5765 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5766 assert_eq!(pos.column, 0);
5767
5768 let pos = buffer
5769 .offset_to_position(11)
5770 .expect("small buffer should have line metadata");
5771 assert_eq!(pos.line, 1, "Byte 11 (start of line 1) should be on line 1");
5772 assert_eq!(pos.column, 0);
5773
5774 let pos = buffer
5775 .offset_to_position(22)
5776 .expect("small buffer should have line metadata");
5777 assert_eq!(pos.line, 2, "Byte 22 (start of line 2) should be on line 2");
5778 assert_eq!(pos.column, 0);
5779
5780 let pos = buffer
5782 .offset_to_position(5)
5783 .expect("small buffer should have line metadata");
5784 assert_eq!(pos.line, 0, "Byte 5 should be on line 0");
5785 assert_eq!(pos.column, 5);
5786
5787 let pos = buffer
5788 .offset_to_position(16)
5789 .expect("small buffer should have line metadata");
5790 assert_eq!(pos.line, 1, "Byte 16 should be on line 1");
5791 assert_eq!(pos.column, 5);
5792 }
5793
5794 #[test]
5795 fn test_line_iterator_with_offset_to_position() {
5796 let mut buffer = TextBuffer::from_bytes(b"line0\nline1\nline2\n".to_vec(), test_fs());
5798
5799 for byte_pos in 0..=buffer.len() {
5801 let iter = buffer.line_iterator(byte_pos, 80);
5802 let iter_pos = iter.current_position();
5803 let expected_line = buffer
5804 .offset_to_position(byte_pos)
5805 .expect("small buffer should have line metadata")
5806 .line;
5807 let expected_line_start = buffer.position_to_offset(Position {
5808 line: expected_line,
5809 column: 0,
5810 });
5811
5812 assert_eq!(
5813 iter_pos, expected_line_start,
5814 "LineIterator at byte {} should position at line start {} but got {}",
5815 byte_pos, expected_line_start, iter_pos
5816 );
5817 }
5818 }
5819
5820 #[test]
5821 fn test_piece_tree_line_count_after_insert() {
5822 let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
5824
5825 buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
5827
5828 let content = buffer.slice_bytes(0..buffer.len());
5830 let newline_count = content.iter().filter(|&&b| b == b'\n').count();
5831 let expected_line_count = newline_count + 1;
5832 let actual_line_count = buffer.line_count();
5833
5834 assert_eq!(
5835 actual_line_count,
5836 Some(expected_line_count),
5837 "Line count mismatch after insert"
5838 );
5839 }
5840
5841 #[test]
5842 fn test_position_to_lsp_position_after_modification() {
5843 let initial = b"fn foo(val: i32) {\n val + 1\n}\n";
5850 let mut buffer = TextBuffer::from_bytes(initial.to_vec(), test_fs());
5851
5852 let (line, char) = buffer.position_to_lsp_position(23);
5855 assert_eq!(line, 1, "Initial: position 23 should be on line 1");
5856 assert_eq!(char, 4, "Initial: position 23 should be at char 4");
5857
5858 buffer.delete_range(
5861 Position { line: 1, column: 4 },
5862 Position { line: 1, column: 7 },
5863 );
5864 buffer.insert_bytes(23, b"value".to_vec()); buffer.delete_range(
5869 Position { line: 0, column: 7 },
5870 Position {
5871 line: 0,
5872 column: 10,
5873 },
5874 );
5875 buffer.insert_bytes(7, b"value".to_vec()); let content = String::from_utf8_lossy(&buffer.get_all_text().unwrap()).to_string();
5879 assert_eq!(content, "fn foo(value: i32) {\n value + 1\n}\n");
5880
5881 let (line, char) = buffer.position_to_lsp_position(25);
5888 assert_eq!(
5889 line, 1,
5890 "After modification: position 25 should be on line 1"
5891 );
5892 assert_eq!(
5893 char, 4,
5894 "After modification: position 25 should be at char 4"
5895 );
5896
5897 let (line, char) = buffer.position_to_lsp_position(21);
5899 assert_eq!(line, 1, "Position 21 should be on line 1");
5900 assert_eq!(char, 0, "Position 21 should be at char 0 (start of line)");
5901 }
5902
5903 #[test]
5904 fn test_detect_crlf() {
5905 assert_eq!(
5906 TextBuffer::detect_line_ending(b"hello\r\nworld\r\n"),
5907 LineEnding::CRLF
5908 );
5909 }
5910
5911 #[test]
5912 fn test_detect_lf() {
5913 assert_eq!(
5914 TextBuffer::detect_line_ending(b"hello\nworld\n"),
5915 LineEnding::LF
5916 );
5917 }
5918
5919 #[test]
5920 fn test_normalize_crlf() {
5921 let input = b"hello\r\nworld\r\n".to_vec();
5922 let output = TextBuffer::normalize_line_endings(input);
5923 assert_eq!(output, b"hello\nworld\n");
5924 }
5925
5926 #[test]
5927 fn test_normalize_empty() {
5928 let input = Vec::new();
5929 let output = TextBuffer::normalize_line_endings(input);
5930 assert_eq!(output, Vec::<u8>::new());
5931 }
5932
5933 #[test]
5940 fn test_get_all_text_returns_empty_for_unloaded_buffers() {
5941 use tempfile::TempDir;
5942 let temp_dir = TempDir::new().unwrap();
5943 let file_path = temp_dir.path().join("large_test.txt");
5944
5945 let original_content = "X".repeat(50_000);
5947 std::fs::write(&file_path, &original_content).unwrap();
5948
5949 let mut buffer = TextBuffer::load_from_file(&file_path, 1024, test_fs()).unwrap();
5951 assert!(buffer.large_file, "Should be in large file mode");
5952 assert!(!buffer.buffers[0].is_loaded(), "Buffer should be unloaded");
5953
5954 buffer.insert_bytes(0, b"EDITED: ".to_vec());
5956
5957 let content_immutable = buffer.get_all_text();
5960
5961 assert!(
5964 content_immutable.is_none(),
5965 "get_all_text() should return None for large files with unloaded regions. \
5966 Got Some({} bytes) instead of None.",
5967 content_immutable.as_ref().map(|c| c.len()).unwrap_or(0)
5968 );
5969
5970 let total = buffer.total_bytes();
5972 let content_lazy = buffer.get_text_range_mut(0, total).unwrap();
5973 assert_eq!(
5974 content_lazy.len(),
5975 50_000 + 8,
5976 "get_text_range_mut() should return all content with lazy loading"
5977 );
5978 assert!(
5979 String::from_utf8_lossy(&content_lazy).starts_with("EDITED: "),
5980 "Content should start with our edit"
5981 );
5982 }
5983
5984 mod line_ending_conversion {
5987 use super::*;
5988
5989 #[test]
5990 fn test_convert_lf_to_crlf() {
5991 let input = b"Line 1\nLine 2\nLine 3\n";
5992 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
5993 assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
5994 }
5995
5996 #[test]
5997 fn test_convert_crlf_to_lf() {
5998 let input = b"Line 1\r\nLine 2\r\nLine 3\r\n";
5999 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
6000 assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
6001 }
6002
6003 #[test]
6004 fn test_convert_cr_to_lf() {
6005 let input = b"Line 1\rLine 2\rLine 3\r";
6006 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
6007 assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
6008 }
6009
6010 #[test]
6011 fn test_convert_mixed_to_crlf() {
6012 let input = b"Line 1\nLine 2\r\nLine 3\r";
6014 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
6015 assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
6016 }
6017
6018 #[test]
6019 fn test_convert_lf_to_lf_is_noop() {
6020 let input = b"Line 1\nLine 2\nLine 3\n";
6021 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
6022 assert_eq!(result, input.to_vec());
6023 }
6024
6025 #[test]
6026 fn test_convert_empty_content() {
6027 let input = b"";
6028 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
6029 assert_eq!(result, b"".to_vec());
6030 }
6031
6032 #[test]
6033 fn test_convert_no_line_endings() {
6034 let input = b"No line endings here";
6035 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
6036 assert_eq!(result, b"No line endings here".to_vec());
6037 }
6038
6039 #[test]
6040 fn test_set_line_ending_marks_modified() {
6041 let mut buffer = TextBuffer::from_bytes(b"Hello\nWorld\n".to_vec(), test_fs());
6042 assert!(!buffer.is_modified());
6043
6044 buffer.set_line_ending(LineEnding::CRLF);
6045 assert!(buffer.is_modified());
6046 }
6047
6048 #[test]
6049 fn test_set_default_line_ending_does_not_mark_modified() {
6050 let mut buffer = TextBuffer::empty(test_fs());
6051 assert!(!buffer.is_modified());
6052
6053 buffer.set_default_line_ending(LineEnding::CRLF);
6054 assert!(!buffer.is_modified());
6055 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
6056 }
6057
6058 #[test]
6059 fn test_save_to_file_converts_lf_to_crlf() {
6060 use tempfile::TempDir;
6061
6062 let temp_dir = TempDir::new().unwrap();
6063 let file_path = temp_dir.path().join("test_lf_to_crlf.txt");
6064
6065 let original_content = b"Line 1\nLine 2\nLine 3\n";
6067 std::fs::write(&file_path, original_content).unwrap();
6068
6069 let mut buffer =
6071 TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
6072 .unwrap();
6073 assert_eq!(buffer.line_ending(), LineEnding::LF);
6074
6075 buffer.set_line_ending(LineEnding::CRLF);
6077 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
6078 assert!(buffer.is_modified());
6079
6080 buffer.save_to_file(&file_path).unwrap();
6082
6083 let saved_bytes = std::fs::read(&file_path).unwrap();
6085 assert_eq!(&saved_bytes, b"Line 1\r\nLine 2\r\nLine 3\r\n");
6086 }
6087
6088 #[test]
6089 fn test_save_to_file_converts_crlf_to_lf() {
6090 use tempfile::TempDir;
6091
6092 let temp_dir = TempDir::new().unwrap();
6093 let file_path = temp_dir.path().join("test_crlf_to_lf.txt");
6094
6095 let original_content = b"Line 1\r\nLine 2\r\nLine 3\r\n";
6097 std::fs::write(&file_path, original_content).unwrap();
6098
6099 let mut buffer =
6101 TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
6102 .unwrap();
6103 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
6104
6105 buffer.set_line_ending(LineEnding::LF);
6107 assert_eq!(buffer.line_ending(), LineEnding::LF);
6108 assert!(buffer.is_modified());
6109
6110 buffer.save_to_file(&file_path).unwrap();
6112
6113 let saved_bytes = std::fs::read(&file_path).unwrap();
6115 assert_eq!(&saved_bytes, b"Line 1\nLine 2\nLine 3\n");
6116 }
6117
6118 #[test]
6119 #[cfg(unix)]
6120 fn test_save_to_unwritable_file() -> anyhow::Result<()> {
6121 if unsafe { libc::getuid() } == 0 {
6124 eprintln!("Skipping test: root bypasses file permission checks");
6125 return Ok(());
6126 }
6127 use std::fs::Permissions;
6128 use std::os::unix::fs::PermissionsExt;
6129 use tempfile::TempDir;
6130
6131 let temp_dir = TempDir::new().unwrap();
6132 let unwritable_dir = temp_dir.path().join("unwritable_dir");
6133 std::fs::create_dir(&unwritable_dir)?;
6134
6135 let file_path = unwritable_dir.join("unwritable.txt");
6136 std::fs::write(&file_path, "original content")?;
6137
6138 std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
6140
6141 let mut buffer = TextBuffer::from_bytes(b"new content".to_vec(), test_fs());
6142 let result = buffer.save_to_file(&file_path);
6143
6144 match result {
6146 Err(e) => {
6147 if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
6148 assert_eq!(sudo_err.dest_path, file_path);
6149 assert!(sudo_err.temp_path.exists());
6150 drop(std::fs::remove_file(&sudo_err.temp_path));
6152 } else {
6153 panic!("Expected SudoSaveRequired error, got: {:?}", e);
6154 }
6155 }
6156 Ok(_) => panic!("Expected error, but save succeeded"),
6157 }
6158
6159 Ok(())
6160 }
6161
6162 #[test]
6163 #[cfg(unix)]
6164 fn test_save_to_unwritable_directory() -> anyhow::Result<()> {
6165 if unsafe { libc::getuid() } == 0 {
6168 eprintln!("Skipping test: root bypasses file permission checks");
6169 return Ok(());
6170 }
6171 use std::fs::Permissions;
6172 use std::os::unix::fs::PermissionsExt;
6173 use tempfile::TempDir;
6174
6175 let temp_dir = TempDir::new().unwrap();
6176 let unwritable_dir = temp_dir.path().join("unwritable_dir");
6177 std::fs::create_dir(&unwritable_dir)?;
6178
6179 let file_path = unwritable_dir.join("test.txt");
6180
6181 std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
6183
6184 let mut buffer = TextBuffer::from_bytes(b"content".to_vec(), test_fs());
6185 let result = buffer.save_to_file(&file_path);
6186
6187 match result {
6188 Err(e) => {
6189 if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
6190 assert_eq!(sudo_err.dest_path, file_path);
6191 assert!(sudo_err.temp_path.exists());
6192 assert!(sudo_err.temp_path.starts_with(std::env::temp_dir()));
6194 drop(std::fs::remove_file(&sudo_err.temp_path));
6196 } else {
6197 panic!("Expected SudoSaveRequired error, got: {:?}", e);
6198 }
6199 }
6200 Ok(_) => panic!("Expected error, but save succeeded"),
6201 }
6202
6203 Ok(())
6204 }
6205 }
6206
6207 mod large_file_encoding_tests {
6208 use super::*;
6209
6210 #[test]
6211 fn test_large_file_encoding_confirmation_display() {
6212 let confirmation = LargeFileEncodingConfirmation {
6213 path: PathBuf::from("/test/file.txt"),
6214 file_size: 150 * 1024 * 1024, encoding: Encoding::ShiftJis,
6216 };
6217
6218 let display = format!("{}", confirmation);
6219 assert!(display.contains("150 MB"), "Display: {}", display);
6220 assert!(display.contains("Shift-JIS"), "Display: {}", display);
6221 assert!(
6222 display.contains("requires full load"),
6223 "Display: {}",
6224 display
6225 );
6226 }
6227
6228 #[test]
6229 fn test_large_file_encoding_confirmation_equality() {
6230 let a = LargeFileEncodingConfirmation {
6231 path: PathBuf::from("/test/file.txt"),
6232 file_size: 100 * 1024 * 1024,
6233 encoding: Encoding::Gb18030,
6234 };
6235 let b = LargeFileEncodingConfirmation {
6236 path: PathBuf::from("/test/file.txt"),
6237 file_size: 100 * 1024 * 1024,
6238 encoding: Encoding::Gb18030,
6239 };
6240 let c = LargeFileEncodingConfirmation {
6241 path: PathBuf::from("/test/other.txt"),
6242 file_size: 100 * 1024 * 1024,
6243 encoding: Encoding::Gb18030,
6244 };
6245
6246 assert_eq!(a, b);
6247 assert_ne!(a, c);
6248 }
6249
6250 #[test]
6251 fn test_encoding_requires_confirmation() {
6252 assert!(!Encoding::Utf8.requires_full_file_load());
6254 assert!(!Encoding::Utf8Bom.requires_full_file_load());
6255 assert!(!Encoding::Ascii.requires_full_file_load());
6256 assert!(!Encoding::Latin1.requires_full_file_load());
6257 assert!(!Encoding::Windows1252.requires_full_file_load());
6258 assert!(!Encoding::Windows1250.requires_full_file_load());
6259 assert!(!Encoding::Windows1251.requires_full_file_load());
6260 assert!(!Encoding::Utf16Le.requires_full_file_load());
6261 assert!(!Encoding::Utf16Be.requires_full_file_load());
6262
6263 assert!(Encoding::Gb18030.requires_full_file_load());
6265 assert!(Encoding::Gbk.requires_full_file_load());
6266 assert!(Encoding::ShiftJis.requires_full_file_load());
6267 assert!(Encoding::EucKr.requires_full_file_load());
6268 }
6269
6270 #[test]
6271 fn test_check_large_file_encoding_small_file() {
6272 use tempfile::NamedTempFile;
6273
6274 let temp = NamedTempFile::new().unwrap();
6276 std::fs::write(temp.path(), b"hello world").unwrap();
6277
6278 let result = TextBuffer::check_large_file_encoding(temp.path(), test_fs()).unwrap();
6279 assert!(
6280 result.is_none(),
6281 "Small files should not require confirmation"
6282 );
6283 }
6284
6285 #[test]
6286 fn test_large_file_encoding_error_downcast() {
6287 let confirmation = LargeFileEncodingConfirmation {
6289 path: PathBuf::from("/test/file.txt"),
6290 file_size: 200 * 1024 * 1024,
6291 encoding: Encoding::EucKr,
6292 };
6293
6294 let error: anyhow::Error = confirmation.clone().into();
6295 let downcast = error.downcast_ref::<LargeFileEncodingConfirmation>();
6296 assert!(downcast.is_some());
6297 assert_eq!(downcast.unwrap().encoding, Encoding::EucKr);
6298 }
6299 }
6300
6301 mod rebuild_pristine_saved_root_tests {
6302 use super::*;
6303 use crate::model::piece_tree::BufferLocation;
6304 use std::sync::Arc;
6305
6306 fn large_file_buffer(content: &[u8]) -> TextBuffer {
6309 let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
6310 Arc::new(crate::model::filesystem::StdFileSystem);
6311 let bytes = content.len();
6312 let buffer =
6313 crate::model::piece_tree::StringBuffer::new_loaded(0, content.to_vec(), false);
6314 let piece_tree = if bytes > 0 {
6315 crate::model::piece_tree::PieceTree::new(BufferLocation::Stored(0), 0, bytes, None)
6316 } else {
6317 crate::model::piece_tree::PieceTree::empty()
6318 };
6319 let saved_root = piece_tree.root();
6320 TextBuffer {
6321 fs,
6322 piece_tree,
6323 saved_root,
6324 buffers: vec![buffer],
6325 next_buffer_id: 1,
6326 file_path: None,
6327 modified: false,
6328 recovery_pending: false,
6329 large_file: true,
6330 line_feeds_scanned: false,
6331 is_binary: false,
6332 line_ending: LineEnding::LF,
6333 original_line_ending: LineEnding::LF,
6334 encoding: Encoding::Utf8,
6335 original_encoding: Encoding::Utf8,
6336 saved_file_size: Some(bytes),
6337 version: 0,
6338 config: BufferConfig::default(),
6339 }
6340 }
6341
6342 fn scan_line_feeds(buf: &mut TextBuffer) -> Vec<(usize, usize)> {
6344 buf.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
6345 let leaves = buf.piece_tree.get_leaves();
6346 let mut updates = Vec::new();
6347 for (idx, leaf) in leaves.iter().enumerate() {
6348 if leaf.line_feed_cnt.is_some() {
6349 continue;
6350 }
6351 let count = buf.scan_leaf(leaf).unwrap();
6352 updates.push((idx, count));
6353 }
6354 updates
6355 }
6356
6357 fn make_content(size: usize) -> Vec<u8> {
6359 let line = b"abcdefghij0123456789ABCDEFGHIJ0123456789abcdefghij0123456789ABCDEFGHIJ\n";
6360 let mut out = Vec::with_capacity(size);
6361 while out.len() < size {
6362 let remaining = size - out.len();
6363 let take = remaining.min(line.len());
6364 out.extend_from_slice(&line[..take]);
6365 }
6366 out
6367 }
6368
6369 #[test]
6370 fn test_no_edits_arc_ptr_eq() {
6371 let content = make_content(2 * 1024 * 1024);
6372 let expected_lf = content.iter().filter(|&&b| b == b'\n').count();
6373 let mut buf = large_file_buffer(&content);
6374
6375 assert!(buf.line_count().is_none());
6377
6378 let updates = scan_line_feeds(&mut buf);
6379 buf.rebuild_with_pristine_saved_root(&updates);
6380
6381 assert_eq!(buf.line_count(), Some(expected_lf + 1));
6383
6384 assert!(Arc::ptr_eq(&buf.saved_root, &buf.piece_tree.root()));
6386 let diff = buf.diff_since_saved();
6387 assert!(diff.equal);
6388 assert!(buf.line_feeds_scanned);
6389 assert_eq!(buf.get_all_text().unwrap(), content);
6390 }
6391
6392 #[test]
6393 fn test_single_insertion() {
6394 let content = make_content(2 * 1024 * 1024);
6395 let mut buf = large_file_buffer(&content);
6396 let updates = scan_line_feeds(&mut buf);
6397
6398 let insert_offset = 1_000_000;
6400 let insert_text = b"INSERTED_TEXT\n";
6401 buf.insert_bytes(insert_offset, insert_text.to_vec());
6402
6403 buf.rebuild_with_pristine_saved_root(&updates);
6404
6405 let mut expected = content.clone();
6407 expected.splice(insert_offset..insert_offset, insert_text.iter().copied());
6408 assert_eq!(buf.get_all_text().unwrap(), expected);
6409
6410 let expected_lf = expected.iter().filter(|&&b| b == b'\n').count();
6412 assert_eq!(buf.line_count(), Some(expected_lf + 1));
6413
6414 let diff = buf.diff_since_saved();
6416 assert!(!diff.equal);
6417 assert!(!diff.byte_ranges.is_empty());
6418 }
6419
6420 #[test]
6425 fn test_diff_byte_ranges_are_document_absolute_after_eof_insert() {
6426 let content = make_content(4 * 1024 * 1024); let mut buf = large_file_buffer(&content);
6428 let updates = scan_line_feeds(&mut buf);
6429 buf.rebuild_with_pristine_saved_root(&updates);
6430
6431 let insert_offset = content.len() - 100;
6433 buf.insert_bytes(insert_offset, b"HELLO".to_vec());
6434
6435 let diff = buf.diff_since_saved();
6436 assert!(!diff.equal, "diff should detect the insertion");
6437 assert!(
6438 !diff.byte_ranges.is_empty(),
6439 "byte_ranges should not be empty"
6440 );
6441
6442 let first_range = &diff.byte_ranges[0];
6444 assert!(
6445 first_range.start >= content.len() - 200,
6446 "byte_ranges should be document-absolute (near EOF): got {:?}, expected near {}",
6447 first_range,
6448 insert_offset,
6449 );
6450 }
6451
6452 #[test]
6453 fn test_single_deletion() {
6454 let content = make_content(2 * 1024 * 1024);
6455 let mut buf = large_file_buffer(&content);
6456 let updates = scan_line_feeds(&mut buf);
6457
6458 let del_start = 500_000;
6460 let del_len = 1000;
6461 buf.delete_bytes(del_start, del_len);
6462
6463 buf.rebuild_with_pristine_saved_root(&updates);
6464
6465 let mut expected = content.clone();
6466 expected.drain(del_start..del_start + del_len);
6467 assert_eq!(buf.get_all_text().unwrap(), expected);
6468
6469 let diff = buf.diff_since_saved();
6470 assert!(!diff.equal);
6471 }
6472
6473 #[test]
6474 fn test_insert_and_delete() {
6475 let content = make_content(2 * 1024 * 1024);
6476 let mut buf = large_file_buffer(&content);
6477 let updates = scan_line_feeds(&mut buf);
6478
6479 let del_start = 100_000;
6481 let del_len = 500;
6482 buf.delete_bytes(del_start, del_len);
6483
6484 let insert_offset = 1_500_000; let insert_text = b"NEW_CONTENT\n";
6486 buf.insert_bytes(insert_offset, insert_text.to_vec());
6487
6488 buf.rebuild_with_pristine_saved_root(&updates);
6489
6490 let mut expected = content.clone();
6492 expected.drain(del_start..del_start + del_len);
6493 expected.splice(insert_offset..insert_offset, insert_text.iter().copied());
6494 assert_eq!(buf.get_all_text().unwrap(), expected);
6495
6496 let diff = buf.diff_since_saved();
6497 assert!(!diff.equal);
6498 }
6499
6500 #[test]
6501 fn test_multiple_scattered_edits() {
6502 let content = make_content(3 * 1024 * 1024);
6503 let mut buf = large_file_buffer(&content);
6504 let updates = scan_line_feeds(&mut buf);
6505 let mut expected = content.clone();
6506
6507 buf.delete_bytes(100_000, 200);
6510 expected.drain(100_000..100_200);
6511
6512 buf.insert_bytes(500_000, b"AAAA\n".to_vec());
6514 expected.splice(500_000..500_000, b"AAAA\n".iter().copied());
6515
6516 buf.delete_bytes(2_000_000, 300);
6518 expected.drain(2_000_000..2_000_300);
6519
6520 buf.insert_bytes(1_000_000, b"BBBB\n".to_vec());
6522 expected.splice(1_000_000..1_000_000, b"BBBB\n".iter().copied());
6523
6524 buf.rebuild_with_pristine_saved_root(&updates);
6525
6526 assert_eq!(buf.get_all_text().unwrap(), expected);
6527 let diff = buf.diff_since_saved();
6528 assert!(!diff.equal);
6529 }
6530
6531 #[test]
6532 fn test_content_preserved_after_rebuild() {
6533 let content = make_content(2 * 1024 * 1024);
6536 let mut buf = large_file_buffer(&content);
6537 let updates = scan_line_feeds(&mut buf);
6538
6539 buf.insert_bytes(0, b"HEADER\n".to_vec());
6540 buf.delete_bytes(1_000_000, 500);
6541
6542 let text_before = buf.get_all_text().unwrap();
6543 buf.rebuild_with_pristine_saved_root(&updates);
6544 let text_after = buf.get_all_text().unwrap();
6545
6546 assert_eq!(text_before, text_after);
6547 }
6548
6549 fn large_file_buffer_unloaded(path: &std::path::Path, file_size: usize) -> TextBuffer {
6552 let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
6553 Arc::new(crate::model::filesystem::StdFileSystem);
6554 let buffer = crate::model::piece_tree::StringBuffer::new_unloaded(
6555 0,
6556 path.to_path_buf(),
6557 0,
6558 file_size,
6559 );
6560 let piece_tree = if file_size > 0 {
6561 crate::model::piece_tree::PieceTree::new(
6562 BufferLocation::Stored(0),
6563 0,
6564 file_size,
6565 None,
6566 )
6567 } else {
6568 crate::model::piece_tree::PieceTree::empty()
6569 };
6570 let saved_root = piece_tree.root();
6571 TextBuffer {
6572 fs,
6573 piece_tree,
6574 saved_root,
6575 buffers: vec![buffer],
6576 next_buffer_id: 1,
6577 file_path: Some(path.to_path_buf()),
6578 modified: false,
6579 recovery_pending: false,
6580 large_file: true,
6581 line_feeds_scanned: false,
6582 is_binary: false,
6583 line_ending: LineEnding::LF,
6584 original_line_ending: LineEnding::LF,
6585 encoding: Encoding::Utf8,
6586 original_encoding: Encoding::Utf8,
6587 saved_file_size: Some(file_size),
6588 version: 0,
6589 config: BufferConfig::default(),
6590 }
6591 }
6592
6593 #[test]
6594 fn test_unloaded_buffer_no_edits_line_count() {
6595 let content = make_content(2 * 1024 * 1024);
6596 let expected_lf = content.iter().filter(|&&b| b == b'\n').count();
6597
6598 let tmp = tempfile::NamedTempFile::new().unwrap();
6599 std::fs::write(tmp.path(), &content).unwrap();
6600 let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6601
6602 assert!(
6603 buf.line_count().is_none(),
6604 "before scan, line_count should be None"
6605 );
6606
6607 let updates = scan_line_feeds(&mut buf);
6608 buf.rebuild_with_pristine_saved_root(&updates);
6609
6610 assert_eq!(
6611 buf.line_count(),
6612 Some(expected_lf + 1),
6613 "after rebuild, line_count must be exact"
6614 );
6615 assert!(buf.line_feeds_scanned);
6616 }
6617
6618 #[test]
6619 fn test_unloaded_buffer_with_edits_line_count() {
6620 let content = make_content(2 * 1024 * 1024);
6621
6622 let tmp = tempfile::NamedTempFile::new().unwrap();
6623 std::fs::write(tmp.path(), &content).unwrap();
6624 let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6625
6626 let updates = scan_line_feeds(&mut buf);
6627
6628 let insert_text = b"INSERTED\n";
6630 buf.insert_bytes(1_000_000, insert_text.to_vec());
6631
6632 buf.rebuild_with_pristine_saved_root(&updates);
6633
6634 let mut expected = content.clone();
6635 expected.splice(1_000_000..1_000_000, insert_text.iter().copied());
6636 let expected_lf = expected.iter().filter(|&&b| b == b'\n').count();
6637
6638 assert_eq!(
6639 buf.line_count(),
6640 Some(expected_lf + 1),
6641 "after rebuild with edits, line_count must be exact"
6642 );
6643 assert!(buf.line_feeds_scanned);
6644 }
6645
6646 #[test]
6651 fn test_diff_efficiency_after_rebuild() {
6652 let content = make_content(32 * 1024 * 1024);
6655 let mut buf = large_file_buffer(&content);
6656
6657 let updates = scan_line_feeds(&mut buf);
6658
6659 buf.insert_bytes(1_000_000, b"HELLO".to_vec());
6661
6662 buf.rebuild_with_pristine_saved_root(&updates);
6663
6664 let diff = buf.diff_since_saved();
6665 assert!(!diff.equal);
6666
6667 let total_leaves = buf.piece_tree.get_leaves().len();
6668 assert!(
6673 diff.nodes_visited < total_leaves,
6674 "diff visited {} nodes but tree has {} leaves — \
6675 Arc::ptr_eq short-circuiting is not working",
6676 diff.nodes_visited,
6677 total_leaves,
6678 );
6679 }
6680
6681 #[test]
6688 fn test_viewport_load_after_rebuild_does_not_load_entire_file() {
6689 use std::sync::atomic::{AtomicUsize, Ordering};
6690
6691 struct TrackingFs {
6693 inner: crate::model::filesystem::StdFileSystem,
6694 max_read_range_len: Arc<AtomicUsize>,
6695 }
6696
6697 impl crate::model::filesystem::FileSystem for TrackingFs {
6698 fn read_file(&self, path: &Path) -> std::io::Result<Vec<u8>> {
6699 self.inner.read_file(path)
6700 }
6701 fn read_range(
6702 &self,
6703 path: &Path,
6704 offset: u64,
6705 len: usize,
6706 ) -> std::io::Result<Vec<u8>> {
6707 self.max_read_range_len.fetch_max(len, Ordering::SeqCst);
6708 self.inner.read_range(path, offset, len)
6709 }
6710 fn write_file(&self, path: &Path, data: &[u8]) -> std::io::Result<()> {
6711 self.inner.write_file(path, data)
6712 }
6713 fn create_file(
6714 &self,
6715 path: &Path,
6716 ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6717 {
6718 self.inner.create_file(path)
6719 }
6720 fn open_file(
6721 &self,
6722 path: &Path,
6723 ) -> std::io::Result<Box<dyn crate::model::filesystem::FileReader>>
6724 {
6725 self.inner.open_file(path)
6726 }
6727 fn open_file_for_write(
6728 &self,
6729 path: &Path,
6730 ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6731 {
6732 self.inner.open_file_for_write(path)
6733 }
6734 fn open_file_for_append(
6735 &self,
6736 path: &Path,
6737 ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6738 {
6739 self.inner.open_file_for_append(path)
6740 }
6741 fn set_file_length(&self, path: &Path, len: u64) -> std::io::Result<()> {
6742 self.inner.set_file_length(path, len)
6743 }
6744 fn rename(&self, from: &Path, to: &Path) -> std::io::Result<()> {
6745 self.inner.rename(from, to)
6746 }
6747 fn copy(&self, from: &Path, to: &Path) -> std::io::Result<u64> {
6748 self.inner.copy(from, to)
6749 }
6750 fn remove_file(&self, path: &Path) -> std::io::Result<()> {
6751 self.inner.remove_file(path)
6752 }
6753 fn remove_dir(&self, path: &Path) -> std::io::Result<()> {
6754 self.inner.remove_dir(path)
6755 }
6756 fn metadata(
6757 &self,
6758 path: &Path,
6759 ) -> std::io::Result<crate::model::filesystem::FileMetadata> {
6760 self.inner.metadata(path)
6761 }
6762 fn symlink_metadata(
6763 &self,
6764 path: &Path,
6765 ) -> std::io::Result<crate::model::filesystem::FileMetadata> {
6766 self.inner.symlink_metadata(path)
6767 }
6768 fn is_dir(&self, path: &Path) -> std::io::Result<bool> {
6769 self.inner.is_dir(path)
6770 }
6771 fn is_file(&self, path: &Path) -> std::io::Result<bool> {
6772 self.inner.is_file(path)
6773 }
6774 fn set_permissions(
6775 &self,
6776 path: &Path,
6777 permissions: &crate::model::filesystem::FilePermissions,
6778 ) -> std::io::Result<()> {
6779 self.inner.set_permissions(path, permissions)
6780 }
6781 fn is_owner(&self, path: &Path) -> bool {
6782 self.inner.is_owner(path)
6783 }
6784 fn read_dir(
6785 &self,
6786 path: &Path,
6787 ) -> std::io::Result<Vec<crate::model::filesystem::DirEntry>> {
6788 self.inner.read_dir(path)
6789 }
6790 fn create_dir(&self, path: &Path) -> std::io::Result<()> {
6791 self.inner.create_dir(path)
6792 }
6793 fn create_dir_all(&self, path: &Path) -> std::io::Result<()> {
6794 self.inner.create_dir_all(path)
6795 }
6796 fn canonicalize(&self, path: &Path) -> std::io::Result<PathBuf> {
6797 self.inner.canonicalize(path)
6798 }
6799 fn current_uid(&self) -> u32 {
6800 self.inner.current_uid()
6801 }
6802 fn sudo_write(
6803 &self,
6804 path: &Path,
6805 data: &[u8],
6806 mode: u32,
6807 uid: u32,
6808 gid: u32,
6809 ) -> std::io::Result<()> {
6810 self.inner.sudo_write(path, data, mode, uid, gid)
6811 }
6812 fn search_file(
6813 &self,
6814 path: &Path,
6815 pattern: &str,
6816 opts: &crate::model::filesystem::FileSearchOptions,
6817 cursor: &mut crate::model::filesystem::FileSearchCursor,
6818 ) -> std::io::Result<Vec<SearchMatch>> {
6819 crate::model::filesystem::default_search_file(
6820 &self.inner,
6821 path,
6822 pattern,
6823 opts,
6824 cursor,
6825 )
6826 }
6827 fn walk_files(
6828 &self,
6829 root: &Path,
6830 skip_dirs: &[&str],
6831 cancel: &std::sync::atomic::AtomicBool,
6832 on_file: &mut dyn FnMut(&Path, &str) -> bool,
6833 ) -> std::io::Result<()> {
6834 self.inner.walk_files(root, skip_dirs, cancel, on_file)
6835 }
6836 }
6837
6838 let file_size = LOAD_CHUNK_SIZE * 3;
6840 let content = make_content(file_size);
6841
6842 let tmp = tempfile::NamedTempFile::new().unwrap();
6843 std::fs::write(tmp.path(), &content).unwrap();
6844
6845 let max_read = Arc::new(AtomicUsize::new(0));
6846 let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
6847 Arc::new(TrackingFs {
6848 inner: crate::model::filesystem::StdFileSystem,
6849 max_read_range_len: max_read.clone(),
6850 });
6851
6852 let buffer = crate::model::piece_tree::StringBuffer::new_unloaded(
6854 0,
6855 tmp.path().to_path_buf(),
6856 0,
6857 file_size,
6858 );
6859 let piece_tree = PieceTree::new(BufferLocation::Stored(0), 0, file_size, None);
6860 let saved_root = piece_tree.root();
6861 let mut buf = TextBuffer {
6862 fs,
6863 piece_tree,
6864 saved_root,
6865 buffers: vec![buffer],
6866 next_buffer_id: 1,
6867 file_path: Some(tmp.path().to_path_buf()),
6868 modified: false,
6869 recovery_pending: false,
6870 large_file: true,
6871 line_feeds_scanned: false,
6872 is_binary: false,
6873 line_ending: LineEnding::LF,
6874 original_line_ending: LineEnding::LF,
6875 encoding: Encoding::Utf8,
6876 original_encoding: Encoding::Utf8,
6877 saved_file_size: Some(file_size),
6878 version: 0,
6879 config: BufferConfig::default(),
6880 };
6881
6882 let viewport_offset = LOAD_CHUNK_SIZE + 100; buf.get_text_range_mut(viewport_offset, 4096).unwrap();
6885
6886 let updates = scan_line_feeds(&mut buf);
6888 buf.rebuild_with_pristine_saved_root(&updates);
6889
6890 max_read.store(0, Ordering::SeqCst);
6892
6893 buf.get_text_range_mut(viewport_offset, 4096).unwrap();
6895
6896 let largest_read = max_read.load(Ordering::SeqCst);
6897 assert!(
6898 largest_read <= LOAD_CHUNK_SIZE,
6899 "After rebuild, loading a viewport triggered a read of {} bytes \
6900 (file_size={}). This means the entire Stored buffer is being \
6901 loaded instead of just the needed chunk.",
6902 largest_read,
6903 file_size,
6904 );
6905 }
6906
6907 #[test]
6913 fn test_viewport_load_after_rebuild_preserves_line_counts() {
6914 let file_size = LOAD_CHUNK_SIZE * 3;
6915 let content = make_content(file_size);
6916
6917 let tmp = tempfile::NamedTempFile::new().unwrap();
6918 std::fs::write(tmp.path(), &content).unwrap();
6919 let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6920
6921 let updates = scan_line_feeds(&mut buf);
6923 buf.rebuild_with_pristine_saved_root(&updates);
6924
6925 let line_count_before = buf.piece_tree.line_count();
6926 assert!(
6927 line_count_before.is_some(),
6928 "line_count must be Some after rebuild"
6929 );
6930
6931 let mid_piece_offset = LOAD_CHUNK_SIZE + LOAD_CHUNK_SIZE / 2;
6934 buf.get_text_range_mut(mid_piece_offset, 4096).unwrap();
6935
6936 let line_count_after = buf.piece_tree.line_count();
6937 assert!(
6938 line_count_after.is_some(),
6939 "line_count must still be Some after viewport load \
6940 (was {:?} before, now {:?})",
6941 line_count_before,
6942 line_count_after,
6943 );
6944 assert_eq!(
6945 line_count_before, line_count_after,
6946 "line_count must not change after viewport load"
6947 );
6948 }
6949
6950 #[test]
6952 fn test_diff_efficiency_after_rebuild_unloaded() {
6953 let content = make_content(32 * 1024 * 1024);
6954
6955 let tmp = tempfile::NamedTempFile::new().unwrap();
6956 std::fs::write(tmp.path(), &content).unwrap();
6957 let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6958
6959 let updates = scan_line_feeds(&mut buf);
6960
6961 buf.insert_bytes(1_000_000, b"HELLO".to_vec());
6962
6963 buf.rebuild_with_pristine_saved_root(&updates);
6964
6965 let diff = buf.diff_since_saved();
6966 assert!(!diff.equal);
6967
6968 let total_leaves = buf.piece_tree.get_leaves().len();
6969 assert!(
6970 diff.nodes_visited < total_leaves,
6971 "diff visited {} nodes but tree has {} leaves — \
6972 Arc::ptr_eq short-circuiting is not working (unloaded path)",
6973 diff.nodes_visited,
6974 total_leaves,
6975 );
6976 }
6977 }
6978
6979 mod chunked_search {
6980 use super::*;
6981
6982 fn make_buffer(content: &[u8]) -> TextBuffer {
6983 TextBuffer::from_bytes(content.to_vec(), test_fs())
6984 }
6985
6986 fn make_regex(pattern: &str) -> regex::bytes::Regex {
6987 regex::bytes::Regex::new(pattern).unwrap()
6988 }
6989
6990 #[test]
6991 fn single_chunk_line_col_context() {
6992 let mut buf = make_buffer(b"hello world\nfoo bar\nbaz quux\n");
6993 let state = buf.search_scan_all(make_regex("bar"), 100, 3).unwrap();
6994 assert_eq!(state.matches.len(), 1);
6995 let m = &state.matches[0];
6996 assert_eq!(m.line, 2);
6997 assert_eq!(m.column, 5); assert_eq!(m.context, "foo bar");
6999 assert_eq!(m.byte_offset, 16); assert_eq!(m.length, 3);
7001 }
7002
7003 #[test]
7004 fn multiple_matches_correct_lines() {
7005 let mut buf = make_buffer(b"aaa\nbbb\nccc\naaa\n");
7006 let state = buf.search_scan_all(make_regex("aaa"), 100, 3).unwrap();
7007 assert_eq!(state.matches.len(), 2);
7008 assert_eq!(state.matches[0].line, 1);
7009 assert_eq!(state.matches[0].context, "aaa");
7010 assert_eq!(state.matches[1].line, 4);
7011 assert_eq!(state.matches[1].context, "aaa");
7012 }
7013
7014 #[test]
7015 fn match_on_last_line_no_trailing_newline() {
7016 let mut buf = make_buffer(b"line1\nline2\ntarget");
7017 let state = buf.search_scan_all(make_regex("target"), 100, 6).unwrap();
7018 assert_eq!(state.matches.len(), 1);
7019 let m = &state.matches[0];
7020 assert_eq!(m.line, 3);
7021 assert_eq!(m.column, 1);
7022 assert_eq!(m.context, "target");
7023 }
7024
7025 #[test]
7026 fn match_at_first_byte() {
7027 let mut buf = make_buffer(b"target\nother\n");
7028 let state = buf.search_scan_all(make_regex("target"), 100, 6).unwrap();
7029 assert_eq!(state.matches.len(), 1);
7030 let m = &state.matches[0];
7031 assert_eq!(m.line, 1);
7032 assert_eq!(m.column, 1);
7033 assert_eq!(m.byte_offset, 0);
7034 }
7035
7036 #[test]
7037 fn max_matches_caps() {
7038 let mut buf = make_buffer(b"a\na\na\na\na\n");
7039 let state = buf.search_scan_all(make_regex("a"), 3, 1).unwrap();
7040 assert_eq!(state.matches.len(), 3);
7041 assert!(state.capped);
7042 }
7043
7044 #[test]
7045 fn case_insensitive_regex() {
7046 let mut buf = make_buffer(b"Hello\nhello\nHELLO\n");
7047 let state = buf
7048 .search_scan_all(make_regex("(?i)hello"), 100, 5)
7049 .unwrap();
7050 assert_eq!(state.matches.len(), 3);
7051 assert_eq!(state.matches[0].line, 1);
7052 assert_eq!(state.matches[1].line, 2);
7053 assert_eq!(state.matches[2].line, 3);
7054 }
7055
7056 #[test]
7057 fn whole_word_boundary() {
7058 let mut buf = make_buffer(b"foobar\nfoo bar\nfoo\n");
7059 let state = buf.search_scan_all(make_regex(r"\bfoo\b"), 100, 3).unwrap();
7060 assert_eq!(state.matches.len(), 2);
7061 assert_eq!(state.matches[0].line, 2);
7062 assert_eq!(state.matches[0].column, 1);
7063 assert_eq!(state.matches[1].line, 3);
7064 }
7065
7066 #[test]
7070 fn multi_chunk_line_numbers_correct() {
7071 let mut content = Vec::new();
7073 for i in 1..=100 {
7074 content.extend_from_slice(format!("line_{:03}\n", i).as_bytes());
7075 }
7076
7077 let temp_dir = tempfile::TempDir::new().unwrap();
7080 let path = temp_dir.path().join("test.txt");
7081 std::fs::write(&path, &content).unwrap();
7082 let mut buffer = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7083
7084 let state = buffer
7085 .search_scan_all(make_regex("line_050"), 100, 8)
7086 .unwrap();
7087 assert_eq!(state.matches.len(), 1);
7088 let m = &state.matches[0];
7089 assert_eq!(m.line, 50);
7090 assert_eq!(m.column, 1);
7091 assert_eq!(m.context, "line_050");
7092 }
7093
7094 #[test]
7097 fn multi_chunk_no_duplicate_matches() {
7098 let mut content = Vec::new();
7099 for i in 1..=100 {
7100 content.extend_from_slice(format!("word_{:03}\n", i).as_bytes());
7101 }
7102
7103 let temp_dir = tempfile::TempDir::new().unwrap();
7104 let path = temp_dir.path().join("test.txt");
7105 std::fs::write(&path, &content).unwrap();
7106 let mut buffer = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7107
7108 let state = buffer.search_scan_all(make_regex("word_"), 200, 5).unwrap();
7110 assert_eq!(
7111 state.matches.len(),
7112 100,
7113 "Should find exactly 100 matches (one per line), no duplicates"
7114 );
7115
7116 for (i, m) in state.matches.iter().enumerate() {
7118 assert_eq!(
7119 m.line,
7120 i + 1,
7121 "Match {} should be on line {}, got {}",
7122 i,
7123 i + 1,
7124 m.line
7125 );
7126 }
7127 }
7128
7129 #[test]
7133 fn overlap_mid_line_line_numbers() {
7134 let mut content = Vec::new();
7137 content.extend_from_slice(b"short\n");
7138 content.extend_from_slice(b"AAAA_");
7140 for _ in 0..50 {
7141 content.extend_from_slice(b"BBBBBBBBBB"); }
7143 content.extend_from_slice(b"_TARGET_HERE\n");
7144 content.extend_from_slice(b"after\n");
7145
7146 let temp_dir = tempfile::TempDir::new().unwrap();
7147 let path = temp_dir.path().join("test.txt");
7148 std::fs::write(&path, &content).unwrap();
7149 let mut buffer = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7150
7151 let state = buffer
7152 .search_scan_all(make_regex("TARGET_HERE"), 100, 11)
7153 .unwrap();
7154 assert_eq!(state.matches.len(), 1);
7155 let m = &state.matches[0];
7156 assert_eq!(m.line, 2, "TARGET_HERE is on line 2 (the long line)");
7157 assert_eq!(m.length, 11);
7158
7159 let state2 = buffer.search_scan_all(make_regex("after"), 100, 5).unwrap();
7161 assert_eq!(state2.matches.len(), 1);
7162 assert_eq!(state2.matches[0].line, 3);
7163 }
7164
7165 #[test]
7168 fn match_spanning_chunk_boundary() {
7169 let mut content = Vec::new();
7171 content.extend_from_slice(b"line1\n");
7172 for _ in 0..60 {
7174 content.extend_from_slice(b"XXXXXXXXXX"); }
7176 content.extend_from_slice(b"SPLIT\n");
7177 content.extend_from_slice(b"end\n");
7178
7179 let temp_dir = tempfile::TempDir::new().unwrap();
7180 let path = temp_dir.path().join("test.txt");
7181 std::fs::write(&path, &content).unwrap();
7182 let mut buffer = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7183
7184 let state = buffer.search_scan_all(make_regex("SPLIT"), 100, 5).unwrap();
7185 assert_eq!(state.matches.len(), 1, "SPLIT should be found exactly once");
7186 assert_eq!(state.matches[0].line, 2); }
7188
7189 #[test]
7190 fn empty_buffer_no_matches() {
7191 let mut buf = make_buffer(b"");
7192 let state = buf.search_scan_all(make_regex("anything"), 100, 8).unwrap();
7193 assert!(state.matches.is_empty());
7194 assert!(!state.capped);
7195 }
7196
7197 #[test]
7198 fn single_line_no_newline() {
7199 let mut buf = make_buffer(b"hello world");
7200 let state = buf.search_scan_all(make_regex("world"), 100, 5).unwrap();
7201 assert_eq!(state.matches.len(), 1);
7202 let m = &state.matches[0];
7203 assert_eq!(m.line, 1);
7204 assert_eq!(m.column, 7);
7205 assert_eq!(m.context, "hello world");
7206 }
7207
7208 #[test]
7211 fn multiple_matches_same_line() {
7212 let mut buf = make_buffer(b"aa bb aa cc aa\nother\n");
7213 let state = buf.search_scan_all(make_regex("aa"), 100, 2).unwrap();
7214 assert_eq!(state.matches.len(), 3);
7215 for m in &state.matches {
7216 assert_eq!(m.line, 1);
7217 assert_eq!(m.context, "aa bb aa cc aa");
7218 }
7219 assert_eq!(state.matches[0].column, 1);
7220 assert_eq!(state.matches[1].column, 7);
7221 assert_eq!(state.matches[2].column, 13);
7222 }
7223 }
7224
7225 mod hybrid_search {
7226 use super::*;
7227
7228 fn make_regex(pattern: &str) -> regex::bytes::Regex {
7229 regex::bytes::Regex::new(pattern).unwrap()
7230 }
7231
7232 fn make_opts() -> crate::model::filesystem::FileSearchOptions {
7233 crate::model::filesystem::FileSearchOptions {
7234 fixed_string: false,
7235 case_sensitive: true,
7236 whole_word: false,
7237 max_matches: 100,
7238 }
7239 }
7240
7241 #[test]
7244 fn hybrid_matches_scan_all_for_loaded_buffer() {
7245 let content = b"foo bar baz\nfoo again\nlast line\n";
7246 let mut buf = TextBuffer::from_bytes(content.to_vec(), test_fs());
7247 let regex = make_regex("foo");
7248 let opts = make_opts();
7249
7250 let hybrid = buf
7251 .search_hybrid("foo", &opts, regex.clone(), 100, 3)
7252 .unwrap();
7253 let scan = buf.search_scan_all(regex, 100, 3).unwrap();
7254
7255 assert_eq!(hybrid.len(), scan.matches.len());
7256 for (h, s) in hybrid.iter().zip(scan.matches.iter()) {
7257 assert_eq!(h.byte_offset, s.byte_offset);
7258 assert_eq!(h.line, s.line);
7259 assert_eq!(h.column, s.column);
7260 assert_eq!(h.length, s.length);
7261 assert_eq!(h.context, s.context);
7262 }
7263 }
7264
7265 #[test]
7268 fn hybrid_finds_matches_in_unloaded_regions() {
7269 let temp_dir = tempfile::TempDir::new().unwrap();
7270 let path = temp_dir.path().join("big.txt");
7271
7272 let mut content = Vec::new();
7274 for i in 0..100 {
7275 content.extend_from_slice(format!("line {:03}\n", i).as_bytes());
7276 }
7277 std::fs::write(&path, &content).unwrap();
7278
7279 let mut buf = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7281
7282 let leaves = buf.piece_tree.get_leaves();
7284 let has_unloaded = leaves.iter().any(|l| {
7285 matches!(l.location, BufferLocation::Stored(_))
7286 && buf
7287 .buffers
7288 .get(l.location.buffer_id())
7289 .map(|b| !b.is_loaded())
7290 .unwrap_or(false)
7291 });
7292
7293 let regex = make_regex("line 050");
7294 let opts = make_opts();
7295 let matches = buf.search_hybrid("line 050", &opts, regex, 100, 8).unwrap();
7296
7297 assert_eq!(matches.len(), 1);
7298 assert_eq!(matches[0].line, 51); assert!(matches[0].context.contains("line 050"));
7300 if has_unloaded {
7302 }
7304 }
7305
7306 #[test]
7309 fn hybrid_dirty_buffer_finds_all_matches() {
7310 let temp_dir = tempfile::TempDir::new().unwrap();
7311 let path = temp_dir.path().join("dirty.txt");
7312
7313 let mut content = Vec::new();
7314 for i in 0..50 {
7315 content.extend_from_slice(format!("target {:02}\n", i).as_bytes());
7316 }
7317 std::fs::write(&path, &content).unwrap();
7318
7319 let mut buf = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7320
7321 buf.insert(0, "target XX\n");
7323
7324 let regex = make_regex("target");
7325 let opts = make_opts();
7326 let matches = buf.search_hybrid("target", &opts, regex, 200, 6).unwrap();
7327
7328 assert_eq!(matches.len(), 51);
7330 assert!(matches[0].context.contains("target XX"));
7332 }
7333
7334 #[test]
7336 fn hybrid_boundary_match() {
7337 let temp_dir = tempfile::TempDir::new().unwrap();
7338 let path = temp_dir.path().join("boundary.txt");
7339
7340 let content = b"AAAAABBBBB";
7342 std::fs::write(&path, content).unwrap();
7343
7344 let mut buf = TextBuffer::from_bytes(content.to_vec(), test_fs());
7345 buf.rename_file_path(path);
7346
7347 let regex = make_regex("AAAAABBBBB");
7348 let opts = make_opts();
7349 let matches = buf
7350 .search_hybrid("AAAAABBBBB", &opts, regex, 100, 10)
7351 .unwrap();
7352
7353 assert_eq!(matches.len(), 1);
7354 assert_eq!(matches[0].byte_offset, 0);
7355 }
7356
7357 #[test]
7359 fn hybrid_max_matches_respected() {
7360 let content = b"aaa\naaa\naaa\naaa\naaa\n";
7361 let mut buf = TextBuffer::from_bytes(content.to_vec(), test_fs());
7362 let regex = make_regex("aaa");
7363 let opts = crate::model::filesystem::FileSearchOptions {
7364 max_matches: 3,
7365 ..make_opts()
7366 };
7367 let matches = buf.search_hybrid("aaa", &opts, regex, 3, 3).unwrap();
7368 assert!(matches.len() <= 3);
7369 }
7370 }
7371
7372 mod boundary_overlap {
7373 use super::*;
7374
7375 fn make_regex(pattern: &str) -> regex::bytes::Regex {
7376 regex::bytes::Regex::new(pattern).unwrap()
7377 }
7378
7379 #[test]
7380 fn empty_prev_tail_returns_nothing() {
7381 let matches = search_boundary_overlap(b"", b"hello", 0, 1, &make_regex("hello"), 100);
7382 assert!(matches.is_empty());
7383 }
7384
7385 #[test]
7386 fn pure_tail_match_skipped() {
7387 let matches =
7389 search_boundary_overlap(b"foo bar", b" baz", 0, 1, &make_regex("foo"), 100);
7390 assert!(matches.is_empty());
7391 }
7392
7393 #[test]
7394 fn cross_boundary_match_found() {
7395 let matches =
7397 search_boundary_overlap(b"xxSPL", b"ITyy", 0, 1, &make_regex("SPLIT"), 100);
7398 assert_eq!(matches.len(), 1);
7399 assert_eq!(matches[0].byte_offset, 2);
7400 assert_eq!(matches[0].length, 5);
7401 }
7402
7403 #[test]
7404 fn pure_head_match_skipped() {
7405 let matches = search_boundary_overlap(b"foo", b" baz", 0, 1, &make_regex("baz"), 100);
7408 assert!(matches.is_empty());
7409 }
7410
7411 #[test]
7412 fn line_number_tracking() {
7413 let matches =
7417 search_boundary_overlap(b"line1\nSPL", b"IT end", 0, 5, &make_regex("SPLIT"), 100);
7418 assert_eq!(matches.len(), 1);
7419 assert_eq!(matches[0].line, 5);
7420 }
7421
7422 #[test]
7423 fn max_matches_respected() {
7424 let matches = search_boundary_overlap(b"aXb", b"Xc", 0, 1, &make_regex("X"), 1);
7426 assert!(matches.len() <= 1);
7427 }
7428 }
7429}
7430
7431#[cfg(test)]
7432mod property_tests {
7433 use crate::model::filesystem::StdFileSystem;
7434 use std::sync::Arc;
7435
7436 fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
7437 Arc::new(StdFileSystem)
7438 }
7439 use super::*;
7440 use proptest::prelude::*;
7441
7442 fn text_with_newlines() -> impl Strategy<Value = Vec<u8>> {
7444 prop::collection::vec(
7445 prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n'),],
7446 0..100,
7447 )
7448 }
7449
7450 #[derive(Debug, Clone)]
7452 enum Operation {
7453 Insert { offset: usize, text: Vec<u8> },
7454 Delete { offset: usize, bytes: usize },
7455 }
7456
7457 fn operation_strategy() -> impl Strategy<Value = Vec<Operation>> {
7458 prop::collection::vec(
7459 prop_oneof![
7460 (0usize..200, text_with_newlines())
7461 .prop_map(|(offset, text)| { Operation::Insert { offset, text } }),
7462 (0usize..200, 1usize..50)
7463 .prop_map(|(offset, bytes)| { Operation::Delete { offset, bytes } }),
7464 ],
7465 0..50,
7466 )
7467 }
7468
7469 proptest! {
7470 #[test]
7471 fn prop_line_count_consistent(text in text_with_newlines()) {
7472 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7473
7474 let newline_count = text.iter().filter(|&&b| b == b'\n').count();
7475 prop_assert_eq!(buffer.line_count(), Some(newline_count + 1));
7476 }
7477
7478 #[test]
7479 fn prop_get_all_text_matches_original(text in text_with_newlines()) {
7480 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7481 prop_assert_eq!(buffer.get_all_text().unwrap(), text);
7482 }
7483
7484 #[test]
7485 fn prop_insert_increases_size(
7486 text in text_with_newlines(),
7487 offset in 0usize..100,
7488 insert_text in text_with_newlines()
7489 ) {
7490 let mut buffer = TextBuffer::from_bytes(text, test_fs());
7491 let initial_bytes = buffer.total_bytes();
7492
7493 let offset = offset.min(buffer.total_bytes());
7494 buffer.insert_bytes(offset, insert_text.clone());
7495
7496 prop_assert_eq!(buffer.total_bytes(), initial_bytes + insert_text.len());
7497 }
7498
7499 #[test]
7500 fn prop_delete_decreases_size(
7501 text in text_with_newlines(),
7502 offset in 0usize..100,
7503 delete_bytes in 1usize..50
7504 ) {
7505 if text.is_empty() {
7506 return Ok(());
7507 }
7508
7509 let mut buffer = TextBuffer::from_bytes(text, test_fs());
7510 let initial_bytes = buffer.total_bytes();
7511
7512 let offset = offset.min(buffer.total_bytes());
7513 let delete_bytes = delete_bytes.min(buffer.total_bytes() - offset);
7514
7515 if delete_bytes == 0 {
7516 return Ok(());
7517 }
7518
7519 buffer.delete_bytes(offset, delete_bytes);
7520
7521 prop_assert_eq!(buffer.total_bytes(), initial_bytes - delete_bytes);
7522 }
7523
7524 #[test]
7525 fn prop_insert_then_delete_restores_original(
7526 text in text_with_newlines(),
7527 offset in 0usize..100,
7528 insert_text in text_with_newlines()
7529 ) {
7530 let mut buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7531
7532 let offset = offset.min(buffer.total_bytes());
7533 buffer.insert_bytes(offset, insert_text.clone());
7534 buffer.delete_bytes(offset, insert_text.len());
7535
7536 prop_assert_eq!(buffer.get_all_text().unwrap(), text);
7537 }
7538
7539 #[test]
7540 fn prop_offset_position_roundtrip(text in text_with_newlines()) {
7541 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7542
7543 for offset in 0..text.len() {
7544 let pos = buffer.offset_to_position(offset).expect("offset_to_position should succeed for valid offset");
7545 let back = buffer.position_to_offset(pos);
7546 prop_assert_eq!(back, offset, "Failed roundtrip for offset {}", offset);
7547 }
7548 }
7549
7550 #[test]
7551 fn prop_get_text_range_valid(
7552 text in text_with_newlines(),
7553 offset in 0usize..100,
7554 length in 1usize..50
7555 ) {
7556 if text.is_empty() {
7557 return Ok(());
7558 }
7559
7560 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7561 let offset = offset.min(buffer.total_bytes());
7562 let length = length.min(buffer.total_bytes() - offset);
7563
7564 if length == 0 {
7565 return Ok(());
7566 }
7567
7568 let result = buffer.get_text_range(offset, length);
7569 prop_assert_eq!(result, Some(text[offset..offset + length].to_vec()));
7570 }
7571
7572 #[test]
7573 fn prop_operations_maintain_consistency(operations in operation_strategy()) {
7574 let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
7575 let mut expected_text = b"initial\ntext".to_vec();
7576
7577 for op in operations {
7578 match op {
7579 Operation::Insert { offset, text } => {
7580 let offset = offset.min(buffer.total_bytes());
7581 buffer.insert_bytes(offset, text.clone());
7582
7583 let offset = offset.min(expected_text.len());
7585 expected_text.splice(offset..offset, text);
7586 }
7587 Operation::Delete { offset, bytes } => {
7588 if offset < buffer.total_bytes() {
7589 let bytes = bytes.min(buffer.total_bytes() - offset);
7590 buffer.delete_bytes(offset, bytes);
7591
7592 if offset < expected_text.len() {
7594 let bytes = bytes.min(expected_text.len() - offset);
7595 expected_text.drain(offset..offset + bytes);
7596 }
7597 }
7598 }
7599 }
7600 }
7601
7602 prop_assert_eq!(buffer.get_all_text().unwrap(), expected_text);
7603 }
7604
7605 #[test]
7606 fn prop_line_count_never_zero(operations in operation_strategy()) {
7607 let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
7608
7609 for op in operations {
7610 match op {
7611 Operation::Insert { offset, text } => {
7612 let offset = offset.min(buffer.total_bytes());
7613 buffer.insert_bytes(offset, text);
7614 }
7615 Operation::Delete { offset, bytes } => {
7616 buffer.delete_bytes(offset, bytes);
7617 }
7618 }
7619
7620 prop_assert!(buffer.line_count().unwrap_or(1) >= 1);
7622 }
7623 }
7624
7625 #[test]
7626 fn prop_total_bytes_never_negative(operations in operation_strategy()) {
7627 let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
7628
7629 for op in operations {
7630 match op {
7631 Operation::Insert { offset, text } => {
7632 let offset = offset.min(buffer.total_bytes());
7633 buffer.insert_bytes(offset, text);
7634 }
7635 Operation::Delete { offset, bytes } => {
7636 buffer.delete_bytes(offset, bytes);
7637 }
7638 }
7639
7640 prop_assert!(buffer.total_bytes() < 10_000_000);
7642 }
7643 }
7644
7645 #[test]
7646 fn prop_piece_tree_and_line_index_stay_synced(operations in operation_strategy()) {
7647 let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
7648
7649 for op in operations {
7650 match op {
7651 Operation::Insert { offset, text } => {
7652 let offset = offset.min(buffer.total_bytes());
7653 buffer.insert_bytes(offset, text);
7654 }
7655 Operation::Delete { offset, bytes } => {
7656 buffer.delete_bytes(offset, bytes);
7657 }
7658 }
7659
7660 if buffer.total_bytes() > 0 {
7662 let mid_offset = buffer.total_bytes() / 2;
7663 if let Some(pos) = buffer.offset_to_position(mid_offset) {
7664 let back = buffer.position_to_offset(pos);
7665
7666 prop_assert!(back <= buffer.total_bytes());
7668 }
7669 }
7670 }
7671 }
7672
7673 #[test]
7674 fn prop_write_recipe_matches_content(text in text_with_newlines()) {
7675 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7676 let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
7677
7678 let output = apply_recipe(&buffer, &recipe);
7680 prop_assert_eq!(output, text, "Recipe output should match original content");
7681 }
7682
7683 #[test]
7684 fn prop_write_recipe_after_edits(
7685 initial_text in text_with_newlines(),
7686 operations in operation_strategy()
7687 ) {
7688 let mut buffer = TextBuffer::from_bytes(initial_text, test_fs());
7689
7690 for op in operations {
7692 match op {
7693 Operation::Insert { offset, text } => {
7694 let offset = offset.min(buffer.total_bytes());
7695 buffer.insert_bytes(offset, text);
7696 }
7697 Operation::Delete { offset, bytes } => {
7698 if offset < buffer.total_bytes() {
7699 let bytes = bytes.min(buffer.total_bytes() - offset);
7700 if bytes > 0 {
7701 buffer.delete_bytes(offset, bytes);
7702 }
7703 }
7704 }
7705 }
7706 }
7707
7708 let expected = buffer.get_all_text().unwrap();
7710 let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
7711 let output = apply_recipe(&buffer, &recipe);
7712
7713 prop_assert_eq!(output, expected, "Recipe output should match buffer content after edits");
7714 }
7715
7716 #[test]
7717 fn prop_write_recipe_copy_ops_valid(
7718 text in prop::collection::vec(prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n')], 10..200),
7719 edit_offset in 0usize..100,
7720 edit_text in text_with_newlines()
7721 ) {
7722 use tempfile::TempDir;
7723
7724 let temp_dir = TempDir::new().unwrap();
7726 let file_path = temp_dir.path().join("test.txt");
7727 std::fs::write(&file_path, &text).unwrap();
7728
7729 let mut buffer = TextBuffer::load_from_file(&file_path, 1024 * 1024, test_fs()).unwrap();
7731
7732 let edit_offset = edit_offset.min(buffer.total_bytes());
7734 buffer.insert_bytes(edit_offset, edit_text.clone());
7735
7736 let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
7738
7739 let expected = buffer.get_all_text().unwrap();
7741 let output = apply_recipe(&buffer, &recipe);
7742 prop_assert_eq!(output, expected, "Recipe with Copy ops should match buffer content");
7743
7744 if text.len() > 100 && edit_offset > 10 {
7747 let has_copy = recipe.actions.iter().any(|a| matches!(a, RecipeAction::Copy { .. }));
7748 let _ = has_copy;
7751 }
7752 }
7753 }
7754
7755 fn apply_recipe(buffer: &TextBuffer, recipe: &WriteRecipe) -> Vec<u8> {
7757 let mut output = Vec::new();
7758 for action in &recipe.actions {
7759 match action {
7760 RecipeAction::Copy { offset, len } => {
7761 if let Some(src_path) = &recipe.src_path {
7762 let data = buffer
7763 .fs
7764 .read_range(src_path, *offset, *len as usize)
7765 .expect("read_range should succeed for Copy op");
7766 output.extend_from_slice(&data);
7767 } else {
7768 panic!("Copy action without source path");
7769 }
7770 }
7771 RecipeAction::Insert { index } => {
7772 output.extend_from_slice(&recipe.insert_data[*index]);
7773 }
7774 }
7775 }
7776 output
7777 }
7778
7779 fn is_detected_as_binary(bytes: &[u8]) -> bool {
7781 TextBuffer::detect_encoding_or_binary(bytes, false).1
7782 }
7783
7784 #[test]
7785 fn test_detect_binary_text_files() {
7786 assert!(!is_detected_as_binary(b"Hello, world!"));
7788 assert!(!is_detected_as_binary(b"Line 1\nLine 2\nLine 3"));
7789 assert!(!is_detected_as_binary(b"Tabs\tand\tnewlines\n"));
7790 assert!(!is_detected_as_binary(b"Carriage return\r\n"));
7791
7792 assert!(!is_detected_as_binary(b""));
7794
7795 assert!(!is_detected_as_binary(b"\x1b[31mRed text\x1b[0m"));
7797 }
7798
7799 #[test]
7800 fn test_detect_binary_binary_files() {
7801 assert!(is_detected_as_binary(b"Hello\x00World"));
7803 assert!(is_detected_as_binary(b"\x00"));
7804
7805 assert!(is_detected_as_binary(b"Text with \x01 control char"));
7807 assert!(is_detected_as_binary(b"\x02\x03\x04"));
7808
7809 assert!(is_detected_as_binary(b"Text with DEL\x7F"));
7811 }
7812
7813 #[test]
7814 fn test_detect_binary_png_file() {
7815 let png_header: &[u8] = &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
7818 assert!(is_detected_as_binary(png_header));
7819
7820 let mut png_data = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
7822 png_data.extend_from_slice(b"\x00\x00\x00\x0DIHDR"); assert!(is_detected_as_binary(&png_data));
7824 }
7825
7826 #[test]
7827 fn test_detect_binary_other_image_formats() {
7828 let jpeg_header: &[u8] = &[0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10];
7830 assert!(is_detected_as_binary(jpeg_header));
7831
7832 let gif_data: &[u8] = &[
7835 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, ];
7841 assert!(is_detected_as_binary(gif_data));
7843
7844 let bmp_header: &[u8] = &[0x42, 0x4D, 0x00, 0x00, 0x00, 0x00];
7846 assert!(is_detected_as_binary(bmp_header));
7847 }
7848
7849 #[test]
7850 fn test_detect_binary_executable_formats() {
7851 let elf_header: &[u8] = &[0x7F, 0x45, 0x4C, 0x46, 0x02, 0x01, 0x01, 0x00];
7853 assert!(is_detected_as_binary(elf_header));
7854
7855 let macho_header: &[u8] = &[0xCF, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x01];
7857 assert!(is_detected_as_binary(macho_header));
7858
7859 let pe_header: &[u8] = &[0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00];
7861 assert!(is_detected_as_binary(pe_header));
7862 }
7863}
7864
7865#[derive(Debug, Clone)]
7867pub struct LineData {
7868 pub byte_offset: usize,
7870 pub content: String,
7872 pub has_newline: bool,
7874 pub line_number: Option<usize>,
7876}
7877
7878pub struct TextBufferLineIterator {
7881 lines: Vec<LineData>,
7883 current_index: usize,
7885 pub has_more: bool,
7887}
7888
7889impl TextBufferLineIterator {
7890 pub(crate) fn new(buffer: &mut TextBuffer, byte_pos: usize, max_lines: usize) -> Result<Self> {
7891 let buffer_len = buffer.len();
7892 if byte_pos >= buffer_len {
7893 return Ok(Self {
7894 lines: Vec::new(),
7895 current_index: 0,
7896 has_more: false,
7897 });
7898 }
7899
7900 let has_line_metadata = buffer.line_count().is_some();
7902
7903 let mut current_line = if has_line_metadata {
7906 buffer.offset_to_position(byte_pos).map(|pos| pos.line)
7907 } else {
7908 None
7909 };
7910
7911 let mut lines = Vec::with_capacity(max_lines);
7912 let mut current_offset = byte_pos;
7913 let estimated_line_length = 80; for _ in 0..max_lines {
7917 if current_offset >= buffer_len {
7918 break;
7919 }
7920
7921 let line_start = current_offset;
7922 let line_number = current_line;
7923
7924 let estimated_max_line_length = estimated_line_length * 3;
7926 let bytes_to_scan = estimated_max_line_length.min(buffer_len - current_offset);
7927
7928 let chunk = buffer.get_text_range_mut(current_offset, bytes_to_scan)?;
7930
7931 let mut line_len = 0;
7933 let mut found_newline = false;
7934 for &byte in chunk.iter() {
7935 line_len += 1;
7936 if byte == b'\n' {
7937 found_newline = true;
7938 break;
7939 }
7940 }
7941
7942 if !found_newline && current_offset + line_len < buffer_len {
7944 let remaining = buffer_len - current_offset - line_len;
7946 let additional_bytes = estimated_max_line_length.min(remaining);
7947 let more_chunk =
7948 buffer.get_text_range_mut(current_offset + line_len, additional_bytes)?;
7949
7950 let mut extended_chunk = chunk;
7951 extended_chunk.extend_from_slice(&more_chunk);
7952
7953 for &byte in more_chunk.iter() {
7954 line_len += 1;
7955 if byte == b'\n' {
7956 found_newline = true;
7957 break;
7958 }
7959 }
7960
7961 let line_string = String::from_utf8_lossy(&extended_chunk[..line_len]).into_owned();
7962 let has_newline = line_string.ends_with('\n');
7963 let content = if has_newline {
7964 line_string[..line_string.len() - 1].to_string()
7965 } else {
7966 line_string
7967 };
7968
7969 lines.push(LineData {
7970 byte_offset: line_start,
7971 content,
7972 has_newline,
7973 line_number,
7974 });
7975
7976 current_offset += line_len;
7977 if has_line_metadata && found_newline {
7978 current_line = current_line.map(|n| n + 1);
7979 }
7980 continue;
7981 }
7982
7983 let line_string = String::from_utf8_lossy(&chunk[..line_len]).into_owned();
7985 let has_newline = line_string.ends_with('\n');
7986 let content = if has_newline {
7987 line_string[..line_string.len() - 1].to_string()
7988 } else {
7989 line_string
7990 };
7991
7992 lines.push(LineData {
7993 byte_offset: line_start,
7994 content,
7995 has_newline,
7996 line_number,
7997 });
7998
7999 current_offset += line_len;
8000 if has_line_metadata && found_newline {
8002 current_line = current_line.map(|n| n + 1);
8003 }
8004 }
8005
8006 let has_more = current_offset < buffer_len;
8008
8009 Ok(Self {
8010 lines,
8011 current_index: 0,
8012 has_more,
8013 })
8014 }
8015}
8016
8017impl Iterator for TextBufferLineIterator {
8018 type Item = LineData;
8019
8020 fn next(&mut self) -> Option<Self::Item> {
8021 if self.current_index < self.lines.len() {
8022 let line = self.lines[self.current_index].clone();
8023 self.current_index += 1;
8024 Some(line)
8025 } else {
8026 None
8027 }
8028 }
8029}