1use crate::model::encoding;
4use crate::model::filesystem::{
5 FileMetadata, FileSearchCursor, FileSearchOptions, FileSystem, WriteOp,
6};
7use crate::model::piece_tree::{
8 BufferData, BufferLocation, Cursor, PieceInfo, PieceRangeIter, PieceTree, PieceView, Position,
9 StringBuffer, TreeStats,
10};
11use crate::model::piece_tree_diff::PieceTreeDiff;
12use crate::primitives::grapheme;
13use anyhow::{Context, Result};
14use regex::bytes::Regex;
15use std::io::{self, Write};
16use std::ops::Range;
17use std::path::{Path, PathBuf};
18use std::sync::Arc;
19
20pub use encoding::Encoding;
22
23#[derive(Debug, Clone, PartialEq)]
28pub struct SudoSaveRequired {
29 pub temp_path: PathBuf,
31 pub dest_path: PathBuf,
33 pub uid: u32,
35 pub gid: u32,
37 pub mode: u32,
39}
40
41impl std::fmt::Display for SudoSaveRequired {
42 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43 write!(
44 f,
45 "Permission denied saving to {}. Use sudo to complete the operation.",
46 self.dest_path.display()
47 )
48 }
49}
50
51impl std::error::Error for SudoSaveRequired {}
52
53#[derive(Debug, Clone, PartialEq)]
60pub struct LargeFileEncodingConfirmation {
61 pub path: PathBuf,
63 pub file_size: usize,
65 pub encoding: Encoding,
67}
68
69impl std::fmt::Display for LargeFileEncodingConfirmation {
70 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71 let size_mb = self.file_size as f64 / (1024.0 * 1024.0);
72 write!(
73 f,
74 "{} ({:.0} MB) requires full load. (l)oad, (e)ncoding, (C)ancel? ",
75 self.encoding.display_name(),
76 size_mb
77 )
78 }
79}
80
81impl std::error::Error for LargeFileEncodingConfirmation {}
82
83#[derive(Debug, Clone)]
85pub struct LineScanChunk {
86 pub leaf_index: usize,
88 pub byte_len: usize,
90 pub already_known: bool,
92}
93
94pub use crate::model::filesystem::SearchMatch;
98
99#[derive(Debug)]
113pub struct ChunkedSearchState {
114 pub chunks: Vec<LineScanChunk>,
116 pub next_chunk: usize,
118 pub next_doc_offset: usize,
120 pub total_bytes: usize,
122 pub scanned_bytes: usize,
124 pub regex: regex::bytes::Regex,
126 pub matches: Vec<SearchMatch>,
128 pub overlap_tail: Vec<u8>,
130 pub overlap_doc_offset: usize,
132 pub max_matches: usize,
134 pub capped: bool,
136 pub query_len: usize,
138 pub(crate) running_line: usize,
141}
142
143impl ChunkedSearchState {
144 pub fn is_done(&self) -> bool {
146 self.next_chunk >= self.chunks.len() || self.capped
147 }
148
149 pub fn progress_percent(&self) -> usize {
151 if self.total_bytes > 0 {
152 (self.scanned_bytes * 100) / self.total_bytes
153 } else {
154 100
155 }
156 }
157}
158
159pub const DEFAULT_LARGE_FILE_THRESHOLD: usize = 100 * 1024 * 1024;
162
163pub const LOAD_CHUNK_SIZE: usize = 1024 * 1024;
165
166pub const CHUNK_ALIGNMENT: usize = 64 * 1024;
168
169#[derive(Debug, Clone)]
171pub struct BufferConfig {
172 pub estimated_line_length: usize,
175}
176
177impl Default for BufferConfig {
178 fn default() -> Self {
179 Self {
180 estimated_line_length: 80,
181 }
182 }
183}
184
185#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
187pub enum LineEnding {
188 #[default]
190 LF,
191 CRLF,
193 CR,
195}
196
197impl LineEnding {
198 pub fn as_str(&self) -> &'static str {
200 match self {
201 Self::LF => "\n",
202 Self::CRLF => "\r\n",
203 Self::CR => "\r",
204 }
205 }
206
207 pub fn display_name(&self) -> &'static str {
209 match self {
210 Self::LF => "LF",
211 Self::CRLF => "CRLF",
212 Self::CR => "CR",
213 }
214 }
215}
216
217struct WriteRecipe {
219 src_path: Option<PathBuf>,
221 insert_data: Vec<Vec<u8>>,
223 actions: Vec<RecipeAction>,
225}
226
227#[derive(Debug, Clone, Copy)]
229enum RecipeAction {
230 Copy { offset: u64, len: u64 },
232 Insert { index: usize },
234}
235
236impl WriteRecipe {
237 fn to_write_ops(&self) -> Vec<WriteOp<'_>> {
239 self.actions
240 .iter()
241 .map(|action| match action {
242 RecipeAction::Copy { offset, len } => WriteOp::Copy {
243 offset: *offset,
244 len: *len,
245 },
246 RecipeAction::Insert { index } => WriteOp::Insert {
247 data: &self.insert_data[*index],
248 },
249 })
250 .collect()
251 }
252
253 fn has_copy_ops(&self) -> bool {
255 self.actions
256 .iter()
257 .any(|a| matches!(a, RecipeAction::Copy { .. }))
258 }
259
260 fn flatten_inserts(&self) -> Vec<u8> {
263 let mut result = Vec::new();
264 for action in &self.actions {
265 if let RecipeAction::Insert { index } = action {
266 result.extend_from_slice(&self.insert_data[*index]);
267 }
268 }
269 result
270 }
271}
272
273#[derive(Debug, Clone, Copy, PartialEq, Eq)]
276pub enum LineNumber {
277 Absolute(usize),
279 Relative {
281 line: usize,
282 from_cached_line: usize,
283 },
284}
285
286impl LineNumber {
287 pub fn value(&self) -> usize {
289 match self {
290 Self::Absolute(line) | Self::Relative { line, .. } => *line,
291 }
292 }
293
294 pub fn is_absolute(&self) -> bool {
296 matches!(self, LineNumber::Absolute(_))
297 }
298
299 pub fn is_relative(&self) -> bool {
301 matches!(self, LineNumber::Relative { .. })
302 }
303
304 pub fn format(&self) -> String {
306 match self {
307 Self::Absolute(line) => format!("{}", line + 1),
308 Self::Relative { line, .. } => format!("~{}", line + 1),
309 }
310 }
311}
312
313pub struct TextBuffer {
316 fs: Arc<dyn FileSystem + Send + Sync>,
319
320 piece_tree: PieceTree,
322
323 saved_root: Arc<crate::model::piece_tree::PieceTreeNode>,
325
326 buffers: Vec<StringBuffer>,
330
331 next_buffer_id: usize,
333
334 file_path: Option<PathBuf>,
336
337 modified: bool,
339
340 recovery_pending: bool,
344
345 large_file: bool,
347
348 line_feeds_scanned: bool,
353
354 is_binary: bool,
357
358 line_ending: LineEnding,
360
361 original_line_ending: LineEnding,
365
366 encoding: Encoding,
368
369 original_encoding: Encoding,
372
373 saved_file_size: Option<usize>,
377
378 version: u64,
380
381 config: BufferConfig,
383}
384
385#[derive(Debug, Clone)]
391pub struct BufferSnapshot {
392 pub piece_tree: PieceTree,
393 pub buffers: Vec<StringBuffer>,
394 pub next_buffer_id: usize,
395}
396
397impl TextBuffer {
398 pub fn new(_large_file_threshold: usize, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
401 let piece_tree = PieceTree::empty();
402 let line_ending = LineEnding::default();
403 let encoding = Encoding::default();
404 TextBuffer {
405 fs,
406 saved_root: piece_tree.root(),
407 piece_tree,
408 buffers: vec![StringBuffer::new(0, Vec::new())],
409 next_buffer_id: 1,
410 file_path: None,
411 modified: false,
412 recovery_pending: false,
413 large_file: false,
414 line_feeds_scanned: false,
415 is_binary: false,
416 line_ending,
417 original_line_ending: line_ending,
418 encoding,
419 original_encoding: encoding,
420 saved_file_size: None,
421 version: 0,
422 config: BufferConfig::default(),
423 }
424 }
425
426 pub fn new_with_path(
429 large_file_threshold: usize,
430 fs: Arc<dyn FileSystem + Send + Sync>,
431 path: PathBuf,
432 ) -> Self {
433 let mut buffer = Self::new(large_file_threshold, fs);
434 buffer.file_path = Some(path);
435 buffer
436 }
437
438 pub fn version(&self) -> u64 {
440 self.version
441 }
442
443 pub fn filesystem(&self) -> &Arc<dyn FileSystem + Send + Sync> {
445 &self.fs
446 }
447
448 pub fn set_filesystem(&mut self, fs: Arc<dyn FileSystem + Send + Sync>) {
450 self.fs = fs;
451 }
452
453 #[inline]
454 fn bump_version(&mut self) {
455 self.version = self.version.wrapping_add(1);
456 }
457
458 #[inline]
459 fn mark_content_modified(&mut self) {
460 self.modified = true;
461 self.recovery_pending = true;
462 self.bump_version();
463 }
464
465 fn from_bytes_raw(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
468 let bytes = content.len();
469
470 let line_ending = Self::detect_line_ending(&content);
472
473 let buffer = StringBuffer::new(0, content);
475 let line_feed_cnt = buffer.line_feed_count();
476
477 let piece_tree = if bytes > 0 {
478 PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
479 } else {
480 PieceTree::empty()
481 };
482
483 let saved_root = piece_tree.root();
484
485 TextBuffer {
486 fs,
487 line_ending,
488 original_line_ending: line_ending,
489 encoding: Encoding::Utf8, original_encoding: Encoding::Utf8,
491 piece_tree,
492 saved_root,
493 buffers: vec![buffer],
494 next_buffer_id: 1,
495 file_path: None,
496 modified: false,
497 recovery_pending: false,
498 large_file: false,
499 line_feeds_scanned: false,
500 is_binary: true,
501 saved_file_size: Some(bytes),
502 version: 0,
503 config: BufferConfig::default(),
504 }
505 }
506
507 pub fn from_bytes(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
509 let (encoding, utf8_content) = Self::detect_and_convert_encoding(&content);
511
512 let bytes = utf8_content.len();
513
514 let line_ending = Self::detect_line_ending(&utf8_content);
516
517 let buffer = StringBuffer::new(0, utf8_content);
519 let line_feed_cnt = buffer.line_feed_count();
520
521 let piece_tree = if bytes > 0 {
522 PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
523 } else {
524 PieceTree::empty()
525 };
526
527 let saved_root = piece_tree.root();
528
529 TextBuffer {
530 fs,
531 line_ending,
532 original_line_ending: line_ending,
533 encoding,
534 original_encoding: encoding,
535 piece_tree,
536 saved_root,
537 buffers: vec![buffer],
538 next_buffer_id: 1,
539 file_path: None,
540 modified: false,
541 recovery_pending: false,
542 large_file: false,
543 line_feeds_scanned: false,
544 is_binary: false,
545 saved_file_size: Some(bytes), version: 0,
547 config: BufferConfig::default(),
548 }
549 }
550
551 pub fn from_bytes_with_encoding(
553 content: Vec<u8>,
554 encoding: Encoding,
555 fs: Arc<dyn FileSystem + Send + Sync>,
556 ) -> Self {
557 let utf8_content = encoding::convert_to_utf8(&content, encoding);
559
560 let bytes = utf8_content.len();
561
562 let line_ending = Self::detect_line_ending(&utf8_content);
564
565 let buffer = StringBuffer::new(0, utf8_content);
567 let line_feed_cnt = buffer.line_feed_count();
568
569 let piece_tree = if bytes > 0 {
570 PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
571 } else {
572 PieceTree::empty()
573 };
574
575 let saved_root = piece_tree.root();
576
577 TextBuffer {
578 fs,
579 line_ending,
580 original_line_ending: line_ending,
581 encoding,
582 original_encoding: encoding,
583 piece_tree,
584 saved_root,
585 buffers: vec![buffer],
586 next_buffer_id: 1,
587 file_path: None,
588 modified: false,
589 recovery_pending: false,
590 large_file: false,
591 line_feeds_scanned: false,
592 is_binary: false,
593 saved_file_size: Some(bytes),
594 version: 0,
595 config: BufferConfig::default(),
596 }
597 }
598
599 pub fn from_str(
601 s: &str,
602 _large_file_threshold: usize,
603 fs: Arc<dyn FileSystem + Send + Sync>,
604 ) -> Self {
605 Self::from_bytes(s.as_bytes().to_vec(), fs)
606 }
607
608 pub fn empty(fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
610 let piece_tree = PieceTree::empty();
611 let saved_root = piece_tree.root();
612 let line_ending = LineEnding::default();
613 let encoding = Encoding::default();
614 TextBuffer {
615 fs,
616 piece_tree,
617 saved_root,
618 buffers: vec![StringBuffer::new(0, Vec::new())],
619 next_buffer_id: 1,
620 file_path: None,
621 modified: false,
622 recovery_pending: false,
623 large_file: false,
624 line_feeds_scanned: false,
625 is_binary: false,
626 line_ending,
627 original_line_ending: line_ending,
628 encoding,
629 original_encoding: encoding,
630 saved_file_size: None,
631 version: 0,
632 config: BufferConfig::default(),
633 }
634 }
635
636 pub fn load_from_file<P: AsRef<Path>>(
638 path: P,
639 large_file_threshold: usize,
640 fs: Arc<dyn FileSystem + Send + Sync>,
641 ) -> anyhow::Result<Self> {
642 let path = path.as_ref();
643
644 let metadata = fs.metadata(path)?;
646 let file_size = metadata.size as usize;
647
648 let threshold = if large_file_threshold > 0 {
650 large_file_threshold
651 } else {
652 DEFAULT_LARGE_FILE_THRESHOLD
653 };
654
655 if file_size >= threshold {
657 Self::load_large_file(path, file_size, fs)
658 } else {
659 Self::load_small_file(path, fs)
660 }
661 }
662
663 pub fn load_from_file_with_encoding<P: AsRef<Path>>(
665 path: P,
666 encoding: Encoding,
667 fs: Arc<dyn FileSystem + Send + Sync>,
668 config: BufferConfig,
669 ) -> anyhow::Result<Self> {
670 let path = path.as_ref();
671 let contents = fs.read_file(path)?;
672
673 let mut buffer = Self::from_bytes_with_encoding(contents, encoding, fs);
674 buffer.file_path = Some(path.to_path_buf());
675 buffer.modified = false;
676 buffer.config = config;
677 Ok(buffer)
678 }
679
680 fn load_small_file(path: &Path, fs: Arc<dyn FileSystem + Send + Sync>) -> anyhow::Result<Self> {
682 let contents = fs.read_file(path)?;
683
684 let (encoding, is_binary) = Self::detect_encoding_or_binary(&contents, false);
686
687 let mut buffer = if is_binary {
689 Self::from_bytes_raw(contents, fs)
690 } else {
691 Self::from_bytes(contents, fs)
693 };
694 buffer.file_path = Some(path.to_path_buf());
695 buffer.modified = false;
696 buffer.large_file = false;
697 buffer.is_binary = is_binary;
698 if is_binary {
700 buffer.encoding = encoding;
701 buffer.original_encoding = encoding;
702 }
703 Ok(buffer)
705 }
706
707 pub fn check_large_file_encoding(
716 path: impl AsRef<Path>,
717 fs: Arc<dyn FileSystem + Send + Sync>,
718 ) -> anyhow::Result<Option<LargeFileEncodingConfirmation>> {
719 let path = path.as_ref();
720 let metadata = fs.metadata(path)?;
721 let file_size = metadata.size as usize;
722
723 if file_size < DEFAULT_LARGE_FILE_THRESHOLD {
725 return Ok(None);
726 }
727
728 let sample_size = file_size.min(8 * 1024);
730 let sample = fs.read_range(path, 0, sample_size)?;
731 let (encoding, is_binary) =
732 Self::detect_encoding_or_binary(&sample, file_size > sample_size);
733
734 if is_binary {
736 return Ok(None);
737 }
738
739 if encoding.requires_full_file_load() {
741 return Ok(Some(LargeFileEncodingConfirmation {
742 path: path.to_path_buf(),
743 file_size,
744 encoding,
745 }));
746 }
747
748 Ok(None)
749 }
750
751 fn load_large_file(
756 path: &Path,
757 file_size: usize,
758 fs: Arc<dyn FileSystem + Send + Sync>,
759 ) -> anyhow::Result<Self> {
760 Self::load_large_file_internal(path, file_size, fs, false)
761 }
762
763 pub fn load_large_file_confirmed(
768 path: impl AsRef<Path>,
769 fs: Arc<dyn FileSystem + Send + Sync>,
770 ) -> anyhow::Result<Self> {
771 let path = path.as_ref();
772 let metadata = fs.metadata(path)?;
773 let file_size = metadata.size as usize;
774 Self::load_large_file_internal(path, file_size, fs, true)
775 }
776
777 fn load_large_file_internal(
779 path: &Path,
780 file_size: usize,
781 fs: Arc<dyn FileSystem + Send + Sync>,
782 force_full_load: bool,
783 ) -> anyhow::Result<Self> {
784 use crate::model::piece_tree::{BufferData, BufferLocation};
785
786 let sample_size = file_size.min(8 * 1024);
789 let sample = fs.read_range(path, 0, sample_size)?;
790
791 let (encoding, is_binary) =
793 Self::detect_encoding_or_binary(&sample, file_size > sample_size);
794
795 if is_binary {
797 tracing::info!("Large binary file detected, loading without encoding conversion");
798 let contents = fs.read_file(path)?;
799 let mut buffer = Self::from_bytes_raw(contents, fs);
800 buffer.file_path = Some(path.to_path_buf());
801 buffer.modified = false;
802 buffer.large_file = true;
803 buffer.encoding = encoding;
804 buffer.original_encoding = encoding;
805 return Ok(buffer);
806 }
807
808 let requires_full_load = encoding.requires_full_file_load();
810
811 if requires_full_load && !force_full_load {
813 anyhow::bail!(LargeFileEncodingConfirmation {
814 path: path.to_path_buf(),
815 file_size,
816 encoding,
817 });
818 }
819
820 if !matches!(encoding, Encoding::Utf8 | Encoding::Ascii) {
823 tracing::info!(
824 "Large file with non-UTF-8 encoding ({:?}), loading fully for conversion",
825 encoding
826 );
827 let contents = fs.read_file(path)?;
828 let mut buffer = Self::from_bytes(contents, fs);
829 buffer.file_path = Some(path.to_path_buf());
830 buffer.modified = false;
831 buffer.large_file = true; buffer.is_binary = is_binary;
833 return Ok(buffer);
834 }
835
836 let line_ending = Self::detect_line_ending(&sample);
838
839 let buffer = StringBuffer {
841 id: 0,
842 data: BufferData::Unloaded {
843 file_path: path.to_path_buf(),
844 file_offset: 0,
845 bytes: file_size,
846 },
847 stored_file_offset: None,
848 };
849
850 let piece_tree = if file_size > 0 {
853 PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
854 } else {
855 PieceTree::empty()
856 };
857 let saved_root = piece_tree.root();
858
859 tracing::debug!(
860 "Buffer::load_from_file: loaded {} bytes, saved_file_size={}",
861 file_size,
862 file_size
863 );
864
865 Ok(TextBuffer {
866 fs,
867 piece_tree,
868 saved_root,
869 buffers: vec![buffer],
870 next_buffer_id: 1,
871 file_path: Some(path.to_path_buf()),
872 modified: false,
873 recovery_pending: false,
874 large_file: true,
875 line_feeds_scanned: false,
876 is_binary,
877 line_ending,
878 original_line_ending: line_ending,
879 encoding,
880 original_encoding: encoding,
881 saved_file_size: Some(file_size),
882 version: 0,
883 config: BufferConfig::default(),
884 })
885 }
886
887 pub fn save(&mut self) -> anyhow::Result<()> {
889 if let Some(path) = &self.file_path {
890 self.save_to_file(path.clone())
891 } else {
892 anyhow::bail!(io::Error::new(
893 io::ErrorKind::NotFound,
894 "No file path associated with buffer",
895 ))
896 }
897 }
898
899 fn should_use_inplace_write(&self, dest_path: &Path) -> bool {
906 !self.fs.is_owner(dest_path)
907 }
908
909 fn build_write_recipe(&self) -> io::Result<WriteRecipe> {
918 let total = self.total_bytes();
919
920 let needs_line_ending_conversion = self.line_ending != self.original_line_ending;
927 let needs_encoding_conversion = !self.is_binary
933 && (self.encoding != self.original_encoding
934 || !matches!(self.encoding, Encoding::Utf8 | Encoding::Ascii));
935 let needs_conversion = needs_line_ending_conversion || needs_encoding_conversion;
936
937 let src_path_for_copy: Option<&Path> = if needs_conversion {
938 None
939 } else {
940 self.file_path.as_deref().filter(|p| self.fs.exists(p))
941 };
942 let target_ending = self.line_ending;
943 let target_encoding = self.encoding;
944
945 let mut insert_data: Vec<Vec<u8>> = Vec::new();
946 let mut actions: Vec<RecipeAction> = Vec::new();
947
948 if let Some(bom) = target_encoding.bom_bytes() {
950 insert_data.push(bom.to_vec());
951 actions.push(RecipeAction::Insert { index: 0 });
952 }
953
954 for piece_view in self.piece_tree.iter_pieces_in_range(0, total) {
955 let buffer_id = piece_view.location.buffer_id();
956 let buffer = self.buffers.get(buffer_id).ok_or_else(|| {
957 io::Error::new(
958 io::ErrorKind::InvalidData,
959 format!("Buffer {} not found", buffer_id),
960 )
961 })?;
962
963 match &buffer.data {
964 BufferData::Unloaded {
966 file_path,
967 file_offset,
968 ..
969 } => {
970 let can_copy = matches!(piece_view.location, BufferLocation::Stored(_))
976 && src_path_for_copy.is_some_and(|src| file_path == src);
977
978 if can_copy {
979 let src_offset = (*file_offset + piece_view.buffer_offset) as u64;
980 actions.push(RecipeAction::Copy {
981 offset: src_offset,
982 len: piece_view.bytes as u64,
983 });
984 continue;
985 }
986
987 let data = self.fs.read_range(
990 file_path,
991 (*file_offset + piece_view.buffer_offset) as u64,
992 piece_view.bytes,
993 )?;
994
995 let data = if needs_line_ending_conversion {
996 Self::convert_line_endings_to(&data, target_ending)
997 } else {
998 data
999 };
1000
1001 let data = if needs_encoding_conversion {
1003 Self::convert_to_encoding(&data, target_encoding)
1004 } else {
1005 data
1006 };
1007
1008 let index = insert_data.len();
1009 insert_data.push(data);
1010 actions.push(RecipeAction::Insert { index });
1011 }
1012
1013 BufferData::Loaded { data, .. } => {
1015 let start = piece_view.buffer_offset;
1016 let end = start + piece_view.bytes;
1017 let chunk = &data[start..end];
1018
1019 let chunk = if needs_line_ending_conversion {
1020 Self::convert_line_endings_to(chunk, target_ending)
1021 } else {
1022 chunk.to_vec()
1023 };
1024
1025 let chunk = if needs_encoding_conversion {
1027 Self::convert_to_encoding(&chunk, target_encoding)
1028 } else {
1029 chunk
1030 };
1031
1032 let index = insert_data.len();
1033 insert_data.push(chunk);
1034 actions.push(RecipeAction::Insert { index });
1035 }
1036 }
1037 }
1038
1039 Ok(WriteRecipe {
1040 src_path: src_path_for_copy.map(|p| p.to_path_buf()),
1041 insert_data,
1042 actions,
1043 })
1044 }
1045
1046 fn create_temp_file(
1052 &self,
1053 dest_path: &Path,
1054 ) -> io::Result<(PathBuf, Box<dyn crate::model::filesystem::FileWriter>)> {
1055 let same_dir_temp = self.fs.temp_path_for(dest_path);
1057 match self.fs.create_file(&same_dir_temp) {
1058 Ok(file) => Ok((same_dir_temp, file)),
1059 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
1060 let temp_path = self.fs.unique_temp_path(dest_path);
1062 let file = self.fs.create_file(&temp_path)?;
1063 Ok((temp_path, file))
1064 }
1065 Err(e) => Err(e),
1066 }
1067 }
1068
1069 fn create_recovery_temp_file(
1072 &self,
1073 dest_path: &Path,
1074 ) -> io::Result<(PathBuf, Box<dyn crate::model::filesystem::FileWriter>)> {
1075 let recovery_dir = crate::input::input_history::get_data_dir()
1077 .map(|d| d.join("recovery"))
1078 .unwrap_or_else(|_| std::env::temp_dir());
1079
1080 self.fs.create_dir_all(&recovery_dir)?;
1082
1083 let file_name = dest_path
1085 .file_name()
1086 .unwrap_or_else(|| std::ffi::OsStr::new("fresh-save"));
1087 let timestamp = std::time::SystemTime::now()
1088 .duration_since(std::time::UNIX_EPOCH)
1089 .map(|d| d.as_nanos())
1090 .unwrap_or(0);
1091 let pid = std::process::id();
1092
1093 let temp_name = format!(
1094 ".inplace-{}-{}-{}.tmp",
1095 file_name.to_string_lossy(),
1096 pid,
1097 timestamp
1098 );
1099 let temp_path = recovery_dir.join(temp_name);
1100
1101 let file = self.fs.create_file(&temp_path)?;
1102 Ok((temp_path, file))
1103 }
1104
1105 fn inplace_recovery_meta_path(&self, dest_path: &Path) -> PathBuf {
1108 let recovery_dir = crate::input::input_history::get_data_dir()
1109 .map(|d| d.join("recovery"))
1110 .unwrap_or_else(|_| std::env::temp_dir());
1111
1112 let hash = crate::services::recovery::path_hash(dest_path);
1113 recovery_dir.join(format!("{}.inplace.json", hash))
1114 }
1115
1116 fn write_inplace_recovery_meta(
1119 &self,
1120 meta_path: &Path,
1121 dest_path: &Path,
1122 temp_path: &Path,
1123 original_metadata: &Option<FileMetadata>,
1124 ) -> io::Result<()> {
1125 #[cfg(unix)]
1126 let (uid, gid, mode) = original_metadata
1127 .as_ref()
1128 .map(|m| {
1129 (
1130 m.uid.unwrap_or(0),
1131 m.gid.unwrap_or(0),
1132 m.permissions.as_ref().map(|p| p.mode()).unwrap_or(0o644),
1133 )
1134 })
1135 .unwrap_or((0, 0, 0o644));
1136 #[cfg(not(unix))]
1137 let (uid, gid, mode) = (0u32, 0u32, 0o644u32);
1138
1139 let recovery = crate::services::recovery::InplaceWriteRecovery::new(
1140 dest_path.to_path_buf(),
1141 temp_path.to_path_buf(),
1142 uid,
1143 gid,
1144 mode,
1145 );
1146
1147 let json = serde_json::to_string_pretty(&recovery)
1148 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1149
1150 self.fs.write_file(meta_path, json.as_bytes())
1151 }
1152
1153 pub fn save_to_file<P: AsRef<Path>>(&mut self, path: P) -> anyhow::Result<()> {
1168 let dest_path = path.as_ref();
1169 let total = self.total_bytes();
1170
1171 if total == 0 {
1173 self.fs.write_file(dest_path, &[])?;
1174 self.finalize_save(dest_path)?;
1175 return Ok(());
1176 }
1177
1178 let recipe = self.build_write_recipe()?;
1180 let ops = recipe.to_write_ops();
1181
1182 let is_local = self.fs.remote_connection_info().is_none();
1185 let use_inplace = is_local && self.should_use_inplace_write(dest_path);
1186
1187 if use_inplace {
1188 self.save_with_inplace_write(dest_path, &recipe)?;
1190 } else if !recipe.has_copy_ops() && !is_local {
1191 let data = recipe.flatten_inserts();
1193 self.fs.write_file(dest_path, &data)?;
1194 } else if is_local {
1195 let write_result = if !recipe.has_copy_ops() {
1197 let data = recipe.flatten_inserts();
1198 self.fs.write_file(dest_path, &data)
1199 } else {
1200 let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
1201 self.fs.write_patched(src_for_patch, dest_path, &ops)
1202 };
1203
1204 if let Err(e) = write_result {
1205 if e.kind() == io::ErrorKind::PermissionDenied {
1206 let original_metadata = self.fs.metadata_if_exists(dest_path);
1208 let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
1209 self.write_recipe_to_file(&mut temp_file, &recipe)?;
1210 temp_file.sync_all()?;
1211 drop(temp_file);
1212 return Err(self.make_sudo_error(temp_path, dest_path, original_metadata));
1213 }
1214 return Err(e.into());
1215 }
1216 } else {
1217 let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
1219 self.fs.write_patched(src_for_patch, dest_path, &ops)?;
1220 }
1221
1222 self.finalize_save(dest_path)?;
1223 Ok(())
1224 }
1225
1226 fn save_with_inplace_write(
1239 &self,
1240 dest_path: &Path,
1241 recipe: &WriteRecipe,
1242 ) -> anyhow::Result<()> {
1243 let original_metadata = self.fs.metadata_if_exists(dest_path);
1244
1245 if !recipe.has_copy_ops() {
1248 let data = recipe.flatten_inserts();
1249 return self.write_data_inplace(dest_path, &data, original_metadata);
1250 }
1251
1252 let (temp_path, mut temp_file) = self.create_recovery_temp_file(dest_path)?;
1256 if let Err(e) = self.write_recipe_to_file(&mut temp_file, recipe) {
1257 #[allow(clippy::let_underscore_must_use)]
1259 let _ = self.fs.remove_file(&temp_path);
1260 return Err(e.into());
1261 }
1262 temp_file.sync_all()?;
1263 drop(temp_file);
1264
1265 let recovery_meta_path = self.inplace_recovery_meta_path(dest_path);
1268 #[allow(clippy::let_underscore_must_use)]
1270 let _ = self.write_inplace_recovery_meta(
1271 &recovery_meta_path,
1272 dest_path,
1273 &temp_path,
1274 &original_metadata,
1275 );
1276
1277 match self.fs.open_file_for_write(dest_path) {
1280 Ok(mut out_file) => {
1281 if let Err(e) = self.stream_file_to_writer(&temp_path, &mut out_file) {
1282 return Err(e.into());
1284 }
1285 out_file.sync_all()?;
1286 #[allow(clippy::let_underscore_must_use)]
1288 let _ = self.fs.remove_file(&temp_path);
1289 #[allow(clippy::let_underscore_must_use)]
1290 let _ = self.fs.remove_file(&recovery_meta_path);
1291 Ok(())
1292 }
1293 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
1294 #[allow(clippy::let_underscore_must_use)]
1297 let _ = self.fs.remove_file(&recovery_meta_path);
1298 Err(self.make_sudo_error(temp_path, dest_path, original_metadata))
1299 }
1300 Err(e) => {
1301 Err(e.into())
1303 }
1304 }
1305 }
1306
1307 fn write_data_inplace(
1309 &self,
1310 dest_path: &Path,
1311 data: &[u8],
1312 original_metadata: Option<FileMetadata>,
1313 ) -> anyhow::Result<()> {
1314 match self.fs.open_file_for_write(dest_path) {
1315 Ok(mut out_file) => {
1316 out_file.write_all(data)?;
1317 out_file.sync_all()?;
1318 Ok(())
1319 }
1320 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
1321 let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
1323 temp_file.write_all(data)?;
1324 temp_file.sync_all()?;
1325 drop(temp_file);
1326 Err(self.make_sudo_error(temp_path, dest_path, original_metadata))
1327 }
1328 Err(e) => Err(e.into()),
1329 }
1330 }
1331
1332 fn stream_file_to_writer(
1334 &self,
1335 src_path: &Path,
1336 out_file: &mut Box<dyn crate::model::filesystem::FileWriter>,
1337 ) -> io::Result<()> {
1338 const CHUNK_SIZE: usize = 1024 * 1024; let file_size = self.fs.metadata(src_path)?.size;
1341 let mut offset = 0u64;
1342
1343 while offset < file_size {
1344 let remaining = file_size - offset;
1345 let chunk_len = std::cmp::min(remaining, CHUNK_SIZE as u64) as usize;
1346 let chunk = self.fs.read_range(src_path, offset, chunk_len)?;
1347 out_file.write_all(&chunk)?;
1348 offset += chunk_len as u64;
1349 }
1350
1351 Ok(())
1352 }
1353
1354 fn write_recipe_to_file(
1356 &self,
1357 out_file: &mut Box<dyn crate::model::filesystem::FileWriter>,
1358 recipe: &WriteRecipe,
1359 ) -> io::Result<()> {
1360 for action in &recipe.actions {
1361 match action {
1362 RecipeAction::Copy { offset, len } => {
1363 let src_path = recipe.src_path.as_ref().ok_or_else(|| {
1365 io::Error::new(io::ErrorKind::InvalidData, "Copy action without source")
1366 })?;
1367 let data = self.fs.read_range(src_path, *offset, *len as usize)?;
1368 out_file.write_all(&data)?;
1369 }
1370 RecipeAction::Insert { index } => {
1371 out_file.write_all(&recipe.insert_data[*index])?;
1372 }
1373 }
1374 }
1375 Ok(())
1376 }
1377
1378 fn finalize_save(&mut self, dest_path: &Path) -> anyhow::Result<()> {
1380 let new_size = self.fs.metadata(dest_path)?.size as usize;
1381 tracing::debug!(
1382 "Buffer::save: updating saved_file_size from {:?} to {}",
1383 self.saved_file_size,
1384 new_size
1385 );
1386 self.saved_file_size = Some(new_size);
1387 self.file_path = Some(dest_path.to_path_buf());
1388
1389 self.consolidate_after_save(dest_path, new_size);
1392
1393 self.mark_saved_snapshot();
1394 self.original_line_ending = self.line_ending;
1395 self.original_encoding = self.encoding;
1396 Ok(())
1397 }
1398
1399 pub fn finalize_external_save(&mut self, dest_path: PathBuf) -> anyhow::Result<()> {
1403 let new_size = self.fs.metadata(&dest_path)?.size as usize;
1404 self.saved_file_size = Some(new_size);
1405 self.file_path = Some(dest_path.clone());
1406
1407 self.consolidate_after_save(&dest_path, new_size);
1409
1410 self.mark_saved_snapshot();
1411 self.original_line_ending = self.line_ending;
1412 self.original_encoding = self.encoding;
1413 Ok(())
1414 }
1415
1416 fn consolidate_after_save(&mut self, path: &Path, file_size: usize) {
1420 if self.large_file {
1421 self.consolidate_large_file(path, file_size);
1422 } else {
1423 self.consolidate_small_file();
1424 }
1425 }
1426
1427 fn consolidate_large_file(&mut self, path: &Path, file_size: usize) {
1431 let preserved_lf = if self.line_feeds_scanned {
1433 self.piece_tree.line_count().map(|c| c.saturating_sub(1))
1434 } else {
1435 None
1436 };
1437
1438 let buffer = StringBuffer {
1439 id: 0,
1440 data: BufferData::Unloaded {
1441 file_path: path.to_path_buf(),
1442 file_offset: 0,
1443 bytes: file_size,
1444 },
1445 stored_file_offset: None,
1446 };
1447
1448 self.piece_tree = if file_size > 0 {
1449 PieceTree::new(BufferLocation::Stored(0), 0, file_size, preserved_lf)
1450 } else {
1451 PieceTree::empty()
1452 };
1453
1454 self.buffers = vec![buffer];
1455 self.next_buffer_id = 1;
1456
1457 tracing::debug!(
1458 "Buffer::consolidate_large_file: consolidated into single piece of {} bytes",
1459 file_size
1460 );
1461 }
1462
1463 fn consolidate_small_file(&mut self) {
1465 if let Some(bytes) = self.get_all_text() {
1466 let line_feed_cnt = bytes.iter().filter(|&&b| b == b'\n').count();
1467 let len = bytes.len();
1468
1469 let buffer = StringBuffer::new_loaded(0, bytes, true);
1471
1472 self.piece_tree = if len > 0 {
1473 PieceTree::new(BufferLocation::Stored(0), 0, len, Some(line_feed_cnt))
1474 } else {
1475 PieceTree::empty()
1476 };
1477
1478 self.buffers = vec![buffer];
1479 self.next_buffer_id = 1;
1480
1481 tracing::debug!(
1482 "Buffer::consolidate_small_file: consolidated into single loaded buffer of {} bytes",
1483 len
1484 );
1485 }
1486 }
1487
1488 fn make_sudo_error(
1490 &self,
1491 temp_path: PathBuf,
1492 dest_path: &Path,
1493 original_metadata: Option<FileMetadata>,
1494 ) -> anyhow::Error {
1495 #[cfg(unix)]
1496 let (uid, gid, mode) = if let Some(ref meta) = original_metadata {
1497 (
1498 meta.uid.unwrap_or(0),
1499 meta.gid.unwrap_or(0),
1500 meta.permissions
1501 .as_ref()
1502 .map(|p| p.mode() & 0o7777)
1503 .unwrap_or(0),
1504 )
1505 } else {
1506 (0, 0, 0)
1507 };
1508 #[cfg(not(unix))]
1509 let (uid, gid, mode) = (0u32, 0u32, 0u32);
1510
1511 let _ = original_metadata; anyhow::anyhow!(SudoSaveRequired {
1514 temp_path,
1515 dest_path: dest_path.to_path_buf(),
1516 uid,
1517 gid,
1518 mode,
1519 })
1520 }
1521
1522 pub fn total_bytes(&self) -> usize {
1524 self.piece_tree.total_bytes()
1525 }
1526
1527 pub fn line_count(&self) -> Option<usize> {
1531 self.piece_tree.line_count()
1532 }
1533
1534 pub fn mark_saved_snapshot(&mut self) {
1536 self.saved_root = self.piece_tree.root();
1537 self.modified = false;
1538 }
1539
1540 pub fn refresh_saved_root_if_unmodified(&mut self) {
1545 if !self.modified {
1546 self.saved_root = self.piece_tree.root();
1547 }
1548 }
1549
1550 fn apply_chunk_load_to_saved_root(
1558 &mut self,
1559 old_buffer_id: usize,
1560 chunk_offset_in_buffer: usize,
1561 chunk_bytes: usize,
1562 new_buffer_id: usize,
1563 ) {
1564 use crate::model::piece_tree::{LeafData, PieceTree};
1565
1566 let mut leaves = Vec::new();
1567 self.saved_root.collect_leaves(&mut leaves);
1568
1569 let mut modified = false;
1570 let mut new_leaves: Vec<LeafData> = Vec::with_capacity(leaves.len() + 2);
1571
1572 for leaf in &leaves {
1573 if leaf.location.buffer_id() != old_buffer_id {
1574 new_leaves.push(*leaf);
1575 continue;
1576 }
1577
1578 let leaf_start = leaf.offset;
1579 let leaf_end = leaf.offset + leaf.bytes;
1580 let chunk_start = chunk_offset_in_buffer;
1581 let chunk_end = chunk_offset_in_buffer + chunk_bytes;
1582
1583 if chunk_start >= leaf_end || chunk_end <= leaf_start {
1585 new_leaves.push(*leaf);
1587 continue;
1588 }
1589
1590 modified = true;
1591
1592 if chunk_start > leaf_start {
1594 new_leaves.push(LeafData::new(
1595 leaf.location,
1596 leaf.offset,
1597 chunk_start - leaf_start,
1598 None, ));
1600 }
1601
1602 let actual_start = chunk_start.max(leaf_start);
1604 let actual_end = chunk_end.min(leaf_end);
1605 let offset_in_chunk = actual_start - chunk_start;
1606 new_leaves.push(LeafData::new(
1607 BufferLocation::Added(new_buffer_id),
1608 offset_in_chunk,
1609 actual_end - actual_start,
1610 None,
1611 ));
1612
1613 if chunk_end < leaf_end {
1615 new_leaves.push(LeafData::new(
1616 leaf.location,
1617 chunk_end,
1618 leaf_end - chunk_end,
1619 None,
1620 ));
1621 }
1622 }
1623
1624 if modified {
1625 self.saved_root = PieceTree::from_leaves(&new_leaves).root();
1626 }
1627 }
1628
1629 pub fn diff_since_saved(&self) -> PieceTreeDiff {
1641 let _span = tracing::info_span!(
1642 "diff_since_saved",
1643 large_file = self.large_file,
1644 modified = self.modified,
1645 lf_scanned = self.line_feeds_scanned
1646 )
1647 .entered();
1648
1649 if !self.modified {
1655 tracing::trace!("diff_since_saved: not modified → equal");
1656 return PieceTreeDiff {
1657 equal: true,
1658 byte_ranges: Vec::new(),
1659 nodes_visited: 0,
1660 };
1661 }
1662
1663 if Arc::ptr_eq(&self.saved_root, &self.piece_tree.root()) {
1666 tracing::trace!("diff_since_saved: Arc::ptr_eq fast path → equal");
1667 return PieceTreeDiff {
1668 equal: true,
1669 byte_ranges: Vec::new(),
1670 nodes_visited: 0,
1671 };
1672 }
1673
1674 let structure_diff = self.diff_trees_by_structure();
1677
1678 if structure_diff.equal {
1680 tracing::trace!("diff_since_saved: structure equal");
1681 return structure_diff;
1682 }
1683
1684 let total_changed_bytes: usize = structure_diff
1688 .byte_ranges
1689 .iter()
1690 .map(|r| r.end.saturating_sub(r.start))
1691 .sum();
1692
1693 const MAX_VERIFY_BYTES: usize = 64 * 1024; if total_changed_bytes <= MAX_VERIFY_BYTES && !structure_diff.byte_ranges.is_empty() {
1698 if self.verify_content_differs_in_ranges(&structure_diff.byte_ranges) {
1700 tracing::trace!(
1701 "diff_since_saved: content differs, byte_ranges={}",
1702 structure_diff.byte_ranges.len(),
1703 );
1704 return structure_diff;
1706 } else {
1707 return PieceTreeDiff {
1709 equal: true,
1710 byte_ranges: Vec::new(),
1711 nodes_visited: structure_diff.nodes_visited,
1712 };
1713 }
1714 }
1715
1716 tracing::info!(
1717 "diff_since_saved: large change, byte_ranges={}, nodes_visited={}",
1718 structure_diff.byte_ranges.len(),
1719 structure_diff.nodes_visited
1720 );
1721 structure_diff
1723 }
1724
1725 fn verify_content_differs_in_ranges(&self, byte_ranges: &[std::ops::Range<usize>]) -> bool {
1728 let saved_bytes = self.tree_total_bytes(&self.saved_root);
1729 let current_bytes = self.piece_tree.total_bytes();
1730
1731 if saved_bytes != current_bytes {
1733 return true;
1734 }
1735
1736 for range in byte_ranges {
1738 if range.start >= range.end {
1739 continue;
1740 }
1741
1742 let saved_slice =
1744 self.extract_range_from_tree(&self.saved_root, range.start, range.end);
1745 let current_slice = self.get_text_range(range.start, range.end);
1747
1748 match (saved_slice, current_slice) {
1749 (Some(saved), Some(current)) => {
1750 if saved != current {
1751 return true; }
1753 }
1754 _ => {
1755 return true;
1757 }
1758 }
1759 }
1760
1761 false
1763 }
1764
1765 fn extract_range_from_tree(
1767 &self,
1768 root: &Arc<crate::model::piece_tree::PieceTreeNode>,
1769 start: usize,
1770 end: usize,
1771 ) -> Option<Vec<u8>> {
1772 let mut result = Vec::with_capacity(end.saturating_sub(start));
1773 self.collect_range_from_node(root, start, end, 0, &mut result)?;
1774 Some(result)
1775 }
1776
1777 fn collect_range_from_node(
1779 &self,
1780 node: &Arc<crate::model::piece_tree::PieceTreeNode>,
1781 range_start: usize,
1782 range_end: usize,
1783 node_offset: usize,
1784 result: &mut Vec<u8>,
1785 ) -> Option<()> {
1786 use crate::model::piece_tree::PieceTreeNode;
1787
1788 match node.as_ref() {
1789 PieceTreeNode::Internal {
1790 left_bytes,
1791 left,
1792 right,
1793 ..
1794 } => {
1795 let left_end = node_offset + left_bytes;
1796
1797 if range_start < left_end {
1799 self.collect_range_from_node(
1800 left,
1801 range_start,
1802 range_end,
1803 node_offset,
1804 result,
1805 )?;
1806 }
1807
1808 if range_end > left_end {
1810 self.collect_range_from_node(right, range_start, range_end, left_end, result)?;
1811 }
1812 }
1813 PieceTreeNode::Leaf {
1814 location,
1815 offset,
1816 bytes,
1817 ..
1818 } => {
1819 let node_end = node_offset + bytes;
1820
1821 if range_start < node_end && range_end > node_offset {
1823 let buf = self.buffers.get(location.buffer_id())?;
1824 let data = buf.get_data()?;
1825
1826 let leaf_start = range_start.saturating_sub(node_offset);
1828 let leaf_end = (range_end - node_offset).min(*bytes);
1829
1830 if leaf_start < leaf_end {
1831 let slice = data.get(*offset + leaf_start..*offset + leaf_end)?;
1832 result.extend_from_slice(slice);
1833 }
1834 }
1835 }
1836 }
1837 Some(())
1838 }
1839
1840 fn tree_total_bytes(&self, root: &Arc<crate::model::piece_tree::PieceTreeNode>) -> usize {
1842 use crate::model::piece_tree::PieceTreeNode;
1843 match root.as_ref() {
1844 PieceTreeNode::Internal {
1845 left_bytes, right, ..
1846 } => left_bytes + self.tree_total_bytes(right),
1847 PieceTreeNode::Leaf { bytes, .. } => *bytes,
1848 }
1849 }
1850
1851 fn diff_trees_by_structure(&self) -> PieceTreeDiff {
1853 crate::model::piece_tree_diff::diff_piece_trees(&self.saved_root, &self.piece_tree.root())
1854 }
1855
1856 pub fn offset_to_position(&self, offset: usize) -> Option<Position> {
1858 self.piece_tree
1859 .offset_to_position(offset, &self.buffers)
1860 .map(|(line, column)| Position { line, column })
1861 }
1862
1863 pub fn position_to_offset(&self, position: Position) -> usize {
1865 self.piece_tree
1866 .position_to_offset(position.line, position.column, &self.buffers)
1867 }
1868
1869 pub fn insert_bytes(&mut self, offset: usize, text: Vec<u8>) -> Cursor {
1871 if text.is_empty() {
1872 return self.piece_tree.cursor_at_offset(offset);
1873 }
1874
1875 self.mark_content_modified();
1877
1878 let line_feed_cnt = Some(text.iter().filter(|&&b| b == b'\n').count());
1880
1881 let (buffer_location, buffer_offset, text_len) =
1883 if let Some(append_info) = self.try_append_to_existing_buffer(offset, &text) {
1884 append_info
1885 } else {
1886 let buffer_id = self.next_buffer_id;
1888 self.next_buffer_id += 1;
1889 let buffer = StringBuffer::new(buffer_id, text.clone());
1890 self.buffers.push(buffer);
1891 (BufferLocation::Added(buffer_id), 0, text.len())
1892 };
1893
1894 if self.line_feeds_scanned {
1897 self.ensure_chunk_loaded_at(offset);
1898 }
1899
1900 self.piece_tree.insert(
1902 offset,
1903 buffer_location,
1904 buffer_offset,
1905 text_len,
1906 line_feed_cnt,
1907 &self.buffers,
1908 )
1909 }
1910
1911 fn try_append_to_existing_buffer(
1914 &mut self,
1915 offset: usize,
1916 text: &[u8],
1917 ) -> Option<(BufferLocation, usize, usize)> {
1918 if text.is_empty() || offset == 0 {
1920 return None;
1921 }
1922
1923 let piece_info = self.piece_tree.find_by_offset(offset - 1)?;
1926
1927 let offset_in_piece = piece_info.offset_in_piece?;
1931 if offset_in_piece + 1 != piece_info.bytes {
1932 return None; }
1934
1935 if !matches!(piece_info.location, BufferLocation::Added(_)) {
1937 return None;
1938 }
1939
1940 let buffer_id = piece_info.location.buffer_id();
1941 let buffer = self.buffers.get_mut(buffer_id)?;
1942
1943 let buffer_len = buffer.get_data()?.len();
1945
1946 if piece_info.offset + piece_info.bytes != buffer_len {
1948 return None;
1949 }
1950
1951 let append_offset = buffer.append(text);
1953
1954 Some((piece_info.location, append_offset, text.len()))
1955 }
1956
1957 pub fn insert(&mut self, offset: usize, text: &str) {
1959 self.insert_bytes(offset, text.as_bytes().to_vec());
1960 }
1961
1962 pub fn insert_at_position(&mut self, position: Position, text: Vec<u8>) -> Cursor {
1965 if text.is_empty() {
1966 let offset = self.position_to_offset(position);
1967 return self.piece_tree.cursor_at_offset(offset);
1968 }
1969
1970 self.mark_content_modified();
1971
1972 let line_feed_cnt = text.iter().filter(|&&b| b == b'\n').count();
1974
1975 let buffer_id = self.next_buffer_id;
1977 self.next_buffer_id += 1;
1978 let buffer = StringBuffer::new(buffer_id, text.clone());
1979 self.buffers.push(buffer);
1980
1981 self.piece_tree.insert_at_position(
1983 position.line,
1984 position.column,
1985 BufferLocation::Added(buffer_id),
1986 0,
1987 text.len(),
1988 line_feed_cnt,
1989 &self.buffers,
1990 )
1991 }
1992
1993 pub fn delete_bytes(&mut self, offset: usize, bytes: usize) {
1995 if bytes == 0 || offset >= self.total_bytes() {
1996 return;
1997 }
1998
1999 if self.line_feeds_scanned {
2002 self.ensure_chunk_loaded_at(offset);
2003 let end = (offset + bytes).min(self.total_bytes());
2004 if end > offset {
2005 self.ensure_chunk_loaded_at(end.saturating_sub(1));
2006 }
2007 }
2008
2009 self.piece_tree.delete(offset, bytes, &self.buffers);
2011
2012 self.mark_content_modified();
2013 }
2014
2015 pub fn delete(&mut self, range: Range<usize>) {
2017 if range.end > range.start {
2018 self.delete_bytes(range.start, range.end - range.start);
2019 }
2020 }
2021
2022 pub fn delete_range(&mut self, start: Position, end: Position) {
2025 self.piece_tree.delete_position_range(
2027 start.line,
2028 start.column,
2029 end.line,
2030 end.column,
2031 &self.buffers,
2032 );
2033 self.mark_content_modified();
2034 }
2035
2036 pub fn replace_content(&mut self, new_content: &str) {
2043 let bytes = new_content.len();
2044 let content_bytes = new_content.as_bytes().to_vec();
2045
2046 let line_feed_cnt = content_bytes.iter().filter(|&&b| b == b'\n').count();
2048
2049 let buffer_id = self.next_buffer_id;
2051 self.next_buffer_id += 1;
2052 let buffer = StringBuffer::new(buffer_id, content_bytes);
2053 self.buffers.push(buffer);
2054
2055 if bytes > 0 {
2057 self.piece_tree = PieceTree::new(
2058 BufferLocation::Added(buffer_id),
2059 0,
2060 bytes,
2061 Some(line_feed_cnt),
2062 );
2063 } else {
2064 self.piece_tree = PieceTree::empty();
2065 }
2066
2067 self.mark_content_modified();
2068 }
2069
2070 pub fn restore_buffer_state(&mut self, snapshot: &BufferSnapshot) {
2076 self.piece_tree = snapshot.piece_tree.clone();
2077 self.buffers = snapshot.buffers.clone();
2078 self.next_buffer_id = snapshot.next_buffer_id;
2079 self.mark_content_modified();
2080 }
2081
2082 pub fn snapshot_buffer_state(&self) -> Arc<BufferSnapshot> {
2088 Arc::new(BufferSnapshot {
2089 piece_tree: self.piece_tree.clone(),
2090 buffers: self.buffers.clone(),
2091 next_buffer_id: self.next_buffer_id,
2092 })
2093 }
2094
2095 pub fn apply_bulk_edits(&mut self, edits: &[(usize, usize, &str)]) -> isize {
2098 let mut buffer_info: Vec<(BufferLocation, usize, usize, Option<usize>)> = Vec::new();
2103
2104 for (_, _, text) in edits {
2105 if !text.is_empty() {
2106 let buffer_id = self.next_buffer_id;
2107 self.next_buffer_id += 1;
2108 let content = text.as_bytes().to_vec();
2109 let lf_cnt = content.iter().filter(|&&b| b == b'\n').count();
2110 let bytes = content.len();
2111 let buffer = StringBuffer::new(buffer_id, content);
2112 self.buffers.push(buffer);
2113 buffer_info.push((BufferLocation::Added(buffer_id), 0, bytes, Some(lf_cnt)));
2114 }
2115 }
2117
2118 let mut idx = 0;
2120 let delta = self
2121 .piece_tree
2122 .apply_bulk_edits(edits, &self.buffers, |_text| {
2123 let info = buffer_info[idx];
2124 idx += 1;
2125 info
2126 });
2127
2128 self.mark_content_modified();
2129 delta
2130 }
2131
2132 fn get_text_range(&self, offset: usize, bytes: usize) -> Option<Vec<u8>> {
2138 if bytes == 0 {
2139 return Some(Vec::new());
2140 }
2141
2142 let mut result = Vec::with_capacity(bytes);
2143 let end_offset = offset + bytes;
2144 let mut collected = 0;
2145
2146 for piece_view in self.piece_tree.iter_pieces_in_range(offset, end_offset) {
2148 let buffer_id = piece_view.location.buffer_id();
2149 if let Some(buffer) = self.buffers.get(buffer_id) {
2150 let piece_start_in_doc = piece_view.doc_offset;
2152 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2153
2154 let read_start = offset.max(piece_start_in_doc);
2156 let read_end = end_offset.min(piece_end_in_doc);
2157
2158 if read_end > read_start {
2159 let offset_in_piece = read_start - piece_start_in_doc;
2160 let bytes_to_read = read_end - read_start;
2161
2162 let buffer_start = piece_view.buffer_offset + offset_in_piece;
2163 let buffer_end = buffer_start + bytes_to_read;
2164
2165 let data = buffer.get_data()?;
2167
2168 if buffer_end <= data.len() {
2169 result.extend_from_slice(&data[buffer_start..buffer_end]);
2170 collected += bytes_to_read;
2171
2172 if collected >= bytes {
2173 break;
2174 }
2175 }
2176 }
2177 }
2178 }
2179
2180 Some(result)
2181 }
2182
2183 pub fn get_text_range_mut(&mut self, offset: usize, bytes: usize) -> Result<Vec<u8>> {
2191 let _span = tracing::info_span!("get_text_range_mut", offset, bytes).entered();
2192 if bytes == 0 {
2193 return Ok(Vec::new());
2194 }
2195
2196 let mut result = Vec::with_capacity(bytes);
2197 let end_offset = (offset + bytes).min(self.len());
2199 let mut current_offset = offset;
2200 let mut iteration_count = 0u32;
2201
2202 while current_offset < end_offset {
2204 iteration_count += 1;
2205 let mut made_progress = false;
2206 let mut restarted_iteration = false;
2207
2208 for piece_view in self
2210 .piece_tree
2211 .iter_pieces_in_range(current_offset, end_offset)
2212 {
2213 let buffer_id = piece_view.location.buffer_id();
2214
2215 let needs_loading = self
2217 .buffers
2218 .get(buffer_id)
2219 .map(|b| !b.is_loaded())
2220 .unwrap_or(false);
2221
2222 if needs_loading && self.chunk_split_and_load(&piece_view, current_offset)? {
2223 restarted_iteration = true;
2224 break;
2225 }
2226
2227 let piece_start_in_doc = piece_view.doc_offset;
2229 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2230
2231 let read_start = current_offset.max(piece_start_in_doc);
2233 let read_end = end_offset.min(piece_end_in_doc);
2234
2235 if read_end > read_start {
2236 let offset_in_piece = read_start - piece_start_in_doc;
2237 let bytes_to_read = read_end - read_start;
2238
2239 let buffer_start = piece_view.buffer_offset + offset_in_piece;
2240 let buffer_end = buffer_start + bytes_to_read;
2241
2242 let buffer = self.buffers.get(buffer_id).context("Buffer not found")?;
2244 let data = buffer
2245 .get_data()
2246 .context("Buffer data unavailable after load")?;
2247
2248 anyhow::ensure!(
2249 buffer_end <= data.len(),
2250 "Buffer range out of bounds: requested {}..{}, buffer size {}",
2251 buffer_start,
2252 buffer_end,
2253 data.len()
2254 );
2255
2256 result.extend_from_slice(&data[buffer_start..buffer_end]);
2257 current_offset = read_end;
2258 made_progress = true;
2259 }
2260 }
2261
2262 if !made_progress && !restarted_iteration {
2264 tracing::error!(
2265 "get_text_range_mut: No progress at offset {} (requested range: {}..{}, buffer len: {})",
2266 current_offset,
2267 offset,
2268 end_offset,
2269 self.len()
2270 );
2271 tracing::error!(
2272 "Piece tree stats: {} total bytes",
2273 self.piece_tree.stats().total_bytes
2274 );
2275 anyhow::bail!(
2276 "Failed to read data at offset {}: no progress made (requested {}..{}, buffer len: {})",
2277 current_offset,
2278 offset,
2279 end_offset,
2280 self.len()
2281 );
2282 }
2283 }
2284
2285 if iteration_count > 1 {
2286 tracing::info!(
2287 iteration_count,
2288 result_len = result.len(),
2289 "get_text_range_mut: completed with multiple iterations"
2290 );
2291 }
2292
2293 Ok(result)
2294 }
2295
2296 pub fn prepare_viewport(&mut self, start_offset: usize, line_count: usize) -> Result<()> {
2309 let _span = tracing::info_span!("prepare_viewport", start_offset, line_count).entered();
2310 let estimated_bytes = line_count.saturating_mul(200);
2313
2314 let remaining_bytes = self.total_bytes().saturating_sub(start_offset);
2316 let bytes_to_load = estimated_bytes.min(remaining_bytes);
2317 tracing::trace!(
2318 bytes_to_load,
2319 total_bytes = self.total_bytes(),
2320 "prepare_viewport loading"
2321 );
2322
2323 self.get_text_range_mut(start_offset, bytes_to_load)?;
2326
2327 Ok(())
2328 }
2329
2330 fn chunk_split_and_load(
2336 &mut self,
2337 piece_view: &PieceView,
2338 current_offset: usize,
2339 ) -> Result<bool> {
2340 let buffer_id = piece_view.location.buffer_id();
2341
2342 let buffer_bytes = self
2347 .buffers
2348 .get(buffer_id)
2349 .and_then(|b| b.unloaded_bytes())
2350 .unwrap_or(0);
2351 let needs_chunk_split =
2352 piece_view.bytes > LOAD_CHUNK_SIZE || buffer_bytes > piece_view.bytes;
2353
2354 tracing::info!(
2355 buffer_id,
2356 piece_bytes = piece_view.bytes,
2357 buffer_bytes,
2358 needs_chunk_split,
2359 piece_doc_offset = piece_view.doc_offset,
2360 current_offset,
2361 "chunk_split_and_load: loading unloaded piece"
2362 );
2363
2364 if !needs_chunk_split {
2365 let _span = tracing::info_span!(
2367 "load_small_buffer",
2368 piece_bytes = piece_view.bytes,
2369 buffer_id,
2370 )
2371 .entered();
2372 self.buffers
2373 .get_mut(buffer_id)
2374 .context("Buffer not found")?
2375 .load(&*self.fs)
2376 .context("Failed to load buffer")?;
2377 return Ok(false);
2378 }
2379
2380 let _span = tracing::info_span!(
2381 "chunk_split_and_load",
2382 piece_bytes = piece_view.bytes,
2383 buffer_id,
2384 )
2385 .entered();
2386
2387 let piece_start_in_doc = piece_view.doc_offset;
2388 let offset_in_piece = current_offset.saturating_sub(piece_start_in_doc);
2389
2390 let (chunk_start_in_buffer, chunk_bytes) = if piece_view.bytes <= LOAD_CHUNK_SIZE {
2395 (piece_view.buffer_offset, piece_view.bytes)
2396 } else {
2397 let start =
2398 (piece_view.buffer_offset + offset_in_piece) / CHUNK_ALIGNMENT * CHUNK_ALIGNMENT;
2399 let bytes = LOAD_CHUNK_SIZE
2400 .min((piece_view.buffer_offset + piece_view.bytes).saturating_sub(start));
2401 (start, bytes)
2402 };
2403
2404 let chunk_start_offset_in_piece =
2406 chunk_start_in_buffer.saturating_sub(piece_view.buffer_offset);
2407 let split_start_in_doc = piece_start_in_doc + chunk_start_offset_in_piece;
2408 let split_end_in_doc = split_start_in_doc + chunk_bytes;
2409
2410 if chunk_start_offset_in_piece > 0 {
2412 self.piece_tree
2413 .split_at_offset(split_start_in_doc, &self.buffers);
2414 }
2415 if split_end_in_doc < piece_start_in_doc + piece_view.bytes {
2416 self.piece_tree
2417 .split_at_offset(split_end_in_doc, &self.buffers);
2418 }
2419
2420 let chunk_buffer = self
2422 .buffers
2423 .get(buffer_id)
2424 .context("Buffer not found")?
2425 .create_chunk_buffer(self.next_buffer_id, chunk_start_in_buffer, chunk_bytes)
2426 .context("Failed to create chunk buffer")?;
2427
2428 self.next_buffer_id += 1;
2429 let new_buffer_id = chunk_buffer.id;
2430 self.buffers.push(chunk_buffer);
2431
2432 self.piece_tree.replace_buffer_reference(
2434 buffer_id,
2435 piece_view.buffer_offset + chunk_start_offset_in_piece,
2436 chunk_bytes,
2437 BufferLocation::Added(new_buffer_id),
2438 );
2439
2440 self.buffers
2442 .get_mut(new_buffer_id)
2443 .context("Chunk buffer not found")?
2444 .load(&*self.fs)
2445 .context("Failed to load chunk")?;
2446
2447 if self.line_feeds_scanned {
2452 let leaves = self.piece_tree.get_leaves();
2453 let mut fixups: Vec<(usize, usize)> = Vec::new();
2454 for (idx, leaf) in leaves.iter().enumerate() {
2455 if leaf.line_feed_cnt.is_none() {
2456 if let Ok(count) = self.scan_leaf(leaf) {
2457 fixups.push((idx, count));
2458 }
2459 }
2460 }
2461 if !fixups.is_empty() {
2462 self.piece_tree.update_leaf_line_feeds_path_copy(&fixups);
2463 }
2464 }
2465
2466 if !self.modified {
2474 self.saved_root = self.piece_tree.root();
2475 } else {
2476 self.apply_chunk_load_to_saved_root(
2477 buffer_id,
2478 chunk_start_in_buffer,
2479 chunk_bytes,
2480 new_buffer_id,
2481 );
2482 }
2483
2484 Ok(true)
2485 }
2486
2487 pub(crate) fn get_all_text(&self) -> Option<Vec<u8>> {
2491 self.get_text_range(0, self.total_bytes())
2492 }
2493
2494 pub(crate) fn get_all_text_string(&self) -> Option<String> {
2498 self.get_all_text()
2499 .map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
2500 }
2501
2502 pub(crate) fn slice_bytes(&self, range: Range<usize>) -> Vec<u8> {
2507 self.get_text_range(range.start, range.end.saturating_sub(range.start))
2508 .unwrap_or_default()
2509 }
2510
2511 pub fn to_string(&self) -> Option<String> {
2514 self.get_all_text_string()
2515 }
2516
2517 pub fn len(&self) -> usize {
2519 self.total_bytes()
2520 }
2521
2522 pub fn is_empty(&self) -> bool {
2524 self.total_bytes() == 0
2525 }
2526
2527 pub fn file_path(&self) -> Option<&Path> {
2529 self.file_path.as_deref()
2530 }
2531
2532 pub fn rename_file_path(&mut self, path: PathBuf) {
2534 self.file_path = Some(path);
2535 }
2536
2537 pub fn clear_file_path(&mut self) {
2541 self.file_path = None;
2542 }
2543
2544 pub fn extend_streaming(&mut self, source_path: &Path, new_size: usize) {
2548 let old_size = self.total_bytes();
2549 if new_size <= old_size {
2550 return;
2551 }
2552
2553 let additional_bytes = new_size - old_size;
2554
2555 let buffer_id = self.next_buffer_id;
2557 self.next_buffer_id += 1;
2558
2559 let new_buffer = StringBuffer::new_unloaded(
2560 buffer_id,
2561 source_path.to_path_buf(),
2562 old_size, additional_bytes, );
2565 self.buffers.push(new_buffer);
2566
2567 self.piece_tree.insert(
2569 old_size,
2570 BufferLocation::Stored(buffer_id),
2571 0,
2572 additional_bytes,
2573 None, &self.buffers,
2575 );
2576 }
2577
2578 pub fn is_modified(&self) -> bool {
2580 self.modified
2581 }
2582
2583 pub fn clear_modified(&mut self) {
2585 self.modified = false;
2586 }
2587
2588 pub fn set_modified(&mut self, modified: bool) {
2591 self.modified = modified;
2592 }
2593
2594 pub fn is_recovery_pending(&self) -> bool {
2596 self.recovery_pending
2597 }
2598
2599 pub fn set_recovery_pending(&mut self, pending: bool) {
2601 self.recovery_pending = pending;
2602 }
2603
2604 fn ensure_chunk_loaded_at(&mut self, offset: usize) {
2610 if let Some(piece_info) = self.piece_tree.find_by_offset(offset) {
2611 let buffer_id = piece_info.location.buffer_id();
2612 if let Some(buffer) = self.buffers.get_mut(buffer_id) {
2613 if !buffer.is_loaded() {
2614 let buf_bytes = buffer.unloaded_bytes().unwrap_or(0);
2615 tracing::info!(
2616 "ensure_chunk_loaded_at: loading buffer {} ({} bytes) for offset {}",
2617 buffer_id,
2618 buf_bytes,
2619 offset
2620 );
2621 if let Err(e) = buffer.load(&*self.fs) {
2622 tracing::warn!("Failed to load chunk at offset {offset}: {e}");
2623 }
2624 }
2625 }
2626 }
2627 }
2628
2629 pub fn is_large_file(&self) -> bool {
2631 self.large_file
2632 }
2633
2634 pub fn has_line_feed_scan(&self) -> bool {
2637 self.line_feeds_scanned
2638 }
2639
2640 pub fn piece_tree_leaves(&self) -> Vec<crate::model::piece_tree::LeafData> {
2642 self.piece_tree.get_leaves()
2643 }
2644
2645 pub fn prepare_line_scan(&mut self) -> (Vec<LineScanChunk>, usize) {
2654 self.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
2656
2657 let leaves = self.piece_tree.get_leaves();
2658 let total_bytes: usize = leaves.iter().map(|l| l.bytes).sum();
2659 let mut chunks = Vec::new();
2660
2661 for (idx, leaf) in leaves.iter().enumerate() {
2662 chunks.push(LineScanChunk {
2663 leaf_index: idx,
2664 byte_len: leaf.bytes,
2665 already_known: leaf.line_feed_cnt.is_some(),
2666 });
2667 }
2668
2669 (chunks, total_bytes)
2670 }
2671
2672 pub fn search_scan_init(
2678 &mut self,
2679 regex: regex::bytes::Regex,
2680 max_matches: usize,
2681 query_len: usize,
2682 ) -> ChunkedSearchState {
2683 let (chunks, total_bytes) = self.prepare_line_scan();
2684 ChunkedSearchState {
2685 chunks,
2686 next_chunk: 0,
2687 next_doc_offset: 0,
2688 total_bytes,
2689 scanned_bytes: 0,
2690 regex,
2691 matches: Vec::new(),
2692 overlap_tail: Vec::new(),
2693 overlap_doc_offset: 0,
2694 max_matches,
2695 capped: false,
2696 query_len,
2697 running_line: 1,
2698 }
2699 }
2700
2701 pub fn search_scan_next_chunk(
2719 &mut self,
2720 state: &mut ChunkedSearchState,
2721 ) -> std::io::Result<bool> {
2722 if state.is_done() {
2723 return Ok(false);
2724 }
2725
2726 let chunk_info = state.chunks[state.next_chunk].clone();
2727 let doc_offset = state.next_doc_offset;
2728
2729 state.next_chunk += 1;
2730 state.scanned_bytes += chunk_info.byte_len;
2731 state.next_doc_offset += chunk_info.byte_len;
2732
2733 let chunk_bytes = self
2735 .get_text_range_mut(doc_offset, chunk_info.byte_len)
2736 .map_err(std::io::Error::other)?;
2737
2738 let overlap_len = state.overlap_tail.len();
2740 let mut search_buf = Vec::with_capacity(overlap_len + chunk_bytes.len());
2741 search_buf.extend_from_slice(&state.overlap_tail);
2742 search_buf.extend_from_slice(&chunk_bytes);
2743
2744 let buf_doc_offset = if overlap_len > 0 {
2745 state.overlap_doc_offset
2746 } else {
2747 doc_offset
2748 };
2749
2750 let newlines_in_overlap = search_buf[..overlap_len]
2754 .iter()
2755 .filter(|&&b| b == b'\n')
2756 .count();
2757 let mut line_at = state.running_line - newlines_in_overlap;
2758 let mut counted_to = 0usize;
2759
2760 for m in state.regex.find_iter(&search_buf) {
2762 if overlap_len > 0 && m.end() <= overlap_len {
2764 continue;
2765 }
2766
2767 if state.matches.len() >= state.max_matches {
2768 state.capped = true;
2769 break;
2770 }
2771
2772 line_at += search_buf[counted_to..m.start()]
2774 .iter()
2775 .filter(|&&b| b == b'\n')
2776 .count();
2777 counted_to = m.start();
2778
2779 let line_start = search_buf[..m.start()]
2781 .iter()
2782 .rposition(|&b| b == b'\n')
2783 .map(|p| p + 1)
2784 .unwrap_or(0);
2785 let line_end = search_buf[m.start()..]
2786 .iter()
2787 .position(|&b| b == b'\n')
2788 .map(|p| m.start() + p)
2789 .unwrap_or(search_buf.len());
2790
2791 let match_doc_offset = buf_doc_offset + m.start();
2792 let match_len = m.end() - m.start();
2793 let column = m.start() - line_start + 1;
2794 let context = String::from_utf8_lossy(&search_buf[line_start..line_end]).into_owned();
2795
2796 state.matches.push(SearchMatch {
2797 byte_offset: match_doc_offset,
2798 length: match_len,
2799 line: line_at,
2800 column,
2801 context,
2802 });
2803 }
2804
2805 let newlines_in_chunk = chunk_bytes.iter().filter(|&&b| b == b'\n').count();
2807 state.running_line += newlines_in_chunk;
2808
2809 let max_overlap = state.query_len.max(256).min(chunk_bytes.len());
2811 let tail_start = chunk_bytes.len().saturating_sub(max_overlap);
2812 state.overlap_tail = chunk_bytes[tail_start..].to_vec();
2813 state.overlap_doc_offset = doc_offset + tail_start;
2814
2815 Ok(!state.is_done())
2816 }
2817
2818 pub fn search_scan_all(
2823 &mut self,
2824 regex: regex::bytes::Regex,
2825 max_matches: usize,
2826 query_len: usize,
2827 ) -> std::io::Result<ChunkedSearchState> {
2828 let mut state = self.search_scan_init(regex, max_matches, query_len);
2829 while self.search_scan_next_chunk(&mut state)? {}
2830 Ok(state)
2831 }
2832
2833 pub fn search_hybrid_plan(&mut self) -> Option<HybridSearchPlan> {
2842 let file_path = self.file_path.clone()?;
2843
2844 self.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
2845 let leaves = self.piece_tree.get_leaves();
2846
2847 let mut regions: Vec<SearchRegion> = Vec::new();
2848 let mut doc_offset = 0usize;
2849
2850 for leaf in &leaves {
2851 let buf = self.buffers.get(leaf.location.buffer_id());
2852 let is_unloaded_stored = matches!(
2853 (&leaf.location, buf),
2854 (
2855 BufferLocation::Stored(_),
2856 Some(StringBuffer {
2857 data: BufferData::Unloaded { .. },
2858 ..
2859 }),
2860 )
2861 );
2862
2863 if is_unloaded_stored {
2864 let file_offset = match buf.unwrap().data {
2865 BufferData::Unloaded {
2866 file_offset: fo, ..
2867 } => fo + leaf.offset,
2868 _ => unreachable!(),
2869 };
2870
2871 if let Some(SearchRegion::Unloaded {
2873 file_offset: prev_fo,
2874 bytes: prev_bytes,
2875 ..
2876 }) = regions.last_mut()
2877 {
2878 if *prev_fo + *prev_bytes == file_offset {
2879 *prev_bytes += leaf.bytes;
2880 doc_offset += leaf.bytes;
2881 continue;
2882 }
2883 }
2884 regions.push(SearchRegion::Unloaded {
2885 file_offset,
2886 bytes: leaf.bytes,
2887 doc_offset,
2888 });
2889 } else {
2890 let data = match buf.and_then(|b| b.get_data()) {
2891 Some(full) => {
2892 let end = (leaf.offset + leaf.bytes).min(full.len());
2893 full[leaf.offset..end].to_vec()
2894 }
2895 None => match self.get_text_range_mut(doc_offset, leaf.bytes) {
2896 Ok(d) => d,
2897 Err(_) => {
2898 doc_offset += leaf.bytes;
2899 continue;
2900 }
2901 },
2902 };
2903
2904 if let Some(SearchRegion::Loaded {
2906 data: prev_data, ..
2907 }) = regions.last_mut()
2908 {
2909 prev_data.extend_from_slice(&data);
2910 doc_offset += leaf.bytes;
2911 continue;
2912 }
2913 regions.push(SearchRegion::Loaded { data, doc_offset });
2914 }
2915
2916 doc_offset += leaf.bytes;
2917 }
2918
2919 Some(HybridSearchPlan { file_path, regions })
2920 }
2921
2922 pub fn search_hybrid(
2932 &mut self,
2933 pattern: &str,
2934 opts: &FileSearchOptions,
2935 regex: Regex,
2936 max_matches: usize,
2937 query_len: usize,
2938 ) -> io::Result<Vec<SearchMatch>> {
2939 let plan = match self.search_hybrid_plan() {
2940 Some(p) => p,
2941 None => {
2942 let state = self.search_scan_all(regex, max_matches, query_len)?;
2943 return Ok(state.matches);
2944 }
2945 };
2946 plan.execute(&*self.fs, pattern, opts, ®ex, max_matches, query_len)
2947 }
2948
2949 pub fn scan_leaf(&self, leaf: &crate::model::piece_tree::LeafData) -> std::io::Result<usize> {
2954 let buffer_id = leaf.location.buffer_id();
2955 let buffer = self
2956 .buffers
2957 .get(buffer_id)
2958 .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::NotFound, "buffer not found"))?;
2959
2960 let count = match &buffer.data {
2961 crate::model::piece_tree::BufferData::Loaded { data, .. } => {
2962 let end = (leaf.offset + leaf.bytes).min(data.len());
2963 data[leaf.offset..end]
2964 .iter()
2965 .filter(|&&b| b == b'\n')
2966 .count()
2967 }
2968 crate::model::piece_tree::BufferData::Unloaded {
2969 file_path,
2970 file_offset,
2971 ..
2972 } => {
2973 let read_offset = *file_offset as u64 + leaf.offset as u64;
2974 self.fs
2975 .count_line_feeds_in_range(file_path, read_offset, leaf.bytes)?
2976 }
2977 };
2978 Ok(count)
2979 }
2980
2981 pub fn leaf_io_params(
2986 &self,
2987 leaf: &crate::model::piece_tree::LeafData,
2988 ) -> Option<(std::path::PathBuf, u64, usize)> {
2989 let buffer_id = leaf.location.buffer_id();
2990 let buffer = self.buffers.get(buffer_id)?;
2991 match &buffer.data {
2992 crate::model::piece_tree::BufferData::Loaded { .. } => None,
2993 crate::model::piece_tree::BufferData::Unloaded {
2994 file_path,
2995 file_offset,
2996 ..
2997 } => {
2998 let read_offset = *file_offset as u64 + leaf.offset as u64;
2999 Some((file_path.clone(), read_offset, leaf.bytes))
3000 }
3001 }
3002 }
3003
3004 pub fn buffer_slice(&self) -> &[StringBuffer] {
3006 &self.buffers
3007 }
3008
3009 pub fn apply_scan_updates(&mut self, updates: &[(usize, usize)]) {
3011 self.piece_tree.update_leaf_line_feeds(updates);
3012 self.line_feeds_scanned = true;
3013 }
3014
3015 pub fn rebuild_with_pristine_saved_root(&mut self, scan_updates: &[(usize, usize)]) {
3020 let file_size = match self.saved_file_size {
3021 Some(s) => s,
3022 None => {
3023 self.apply_scan_updates(scan_updates);
3026 return;
3027 }
3028 };
3029
3030 let total = self.total_bytes();
3032 let mut deletions: Vec<(usize, usize)> = Vec::new();
3034 let mut insertions: Vec<(usize, BufferLocation, usize, usize, Option<usize>)> = Vec::new();
3037 let mut orig_cursor: usize = 0;
3038 let mut stored_bytes_in_doc: usize = 0;
3039
3040 for piece in self.piece_tree.iter_pieces_in_range(0, total) {
3041 match piece.location {
3042 BufferLocation::Stored(_) => {
3043 if piece.buffer_offset > orig_cursor {
3044 deletions.push((orig_cursor, piece.buffer_offset - orig_cursor));
3045 }
3046 orig_cursor = piece.buffer_offset + piece.bytes;
3047 stored_bytes_in_doc += piece.bytes;
3048 }
3049 BufferLocation::Added(id) => {
3050 if let Some(file_off) = self.buffers.get(id).and_then(|b| b.stored_file_offset)
3054 {
3055 if file_off > orig_cursor {
3056 deletions.push((orig_cursor, file_off - orig_cursor));
3057 }
3058 orig_cursor = file_off + piece.bytes;
3059 stored_bytes_in_doc += piece.bytes;
3060 } else {
3061 insertions.push((
3062 stored_bytes_in_doc,
3063 piece.location,
3064 piece.buffer_offset,
3065 piece.bytes,
3066 piece.line_feed_cnt,
3067 ));
3068 }
3069 }
3070 }
3071 }
3072 if orig_cursor < file_size {
3074 deletions.push((orig_cursor, file_size - orig_cursor));
3075 }
3076
3077 let mut pristine = if file_size > 0 {
3079 PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
3080 } else {
3081 PieceTree::empty()
3082 };
3083 pristine.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
3084 pristine.update_leaf_line_feeds(scan_updates);
3085
3086 self.saved_root = pristine.root();
3088
3089 if deletions.is_empty() && insertions.is_empty() {
3091 self.piece_tree = pristine;
3092 self.line_feeds_scanned = true;
3093 return;
3094 }
3095
3096 let mut tree = pristine;
3098
3099 deletions.sort_by(|a, b| b.0.cmp(&a.0));
3101 for &(offset, len) in &deletions {
3102 tree.delete(offset, len, &self.buffers);
3103 }
3104
3105 let mut insert_delta: usize = 0;
3108 for &(offset, location, buf_offset, bytes, lf_cnt) in &insertions {
3109 tree.insert(
3110 offset + insert_delta,
3111 location,
3112 buf_offset,
3113 bytes,
3114 lf_cnt,
3115 &self.buffers,
3116 );
3117 insert_delta += bytes;
3118 }
3119
3120 let leaves = tree.get_leaves();
3125 let mut fixups: Vec<(usize, usize)> = Vec::new();
3126 for (idx, leaf) in leaves.iter().enumerate() {
3127 if leaf.line_feed_cnt.is_none() {
3128 if let Ok(count) = self.scan_leaf(leaf) {
3129 fixups.push((idx, count));
3130 }
3131 }
3132 }
3133 if !fixups.is_empty() {
3134 tree.update_leaf_line_feeds_path_copy(&fixups);
3135 }
3136
3137 self.piece_tree = tree;
3138 self.line_feeds_scanned = true;
3139 }
3140
3141 pub fn resolve_line_byte_offset(&mut self, target_line: usize) -> Option<usize> {
3147 if target_line == 0 {
3148 return Some(0);
3149 }
3150
3151 let (doc_offset, buffer_id, piece_offset, piece_bytes, lines_before) =
3153 self.piece_tree.piece_info_for_line(target_line)?;
3154
3155 let lines_to_skip = target_line - lines_before;
3157
3158 let buffer = self.buffers.get(buffer_id)?;
3160 let piece_data: Vec<u8> = match &buffer.data {
3161 crate::model::piece_tree::BufferData::Loaded { data, .. } => {
3162 let end = (piece_offset + piece_bytes).min(data.len());
3163 data[piece_offset..end].to_vec()
3164 }
3165 crate::model::piece_tree::BufferData::Unloaded {
3166 file_path,
3167 file_offset,
3168 ..
3169 } => {
3170 let read_offset = *file_offset as u64 + piece_offset as u64;
3171 self.fs
3172 .read_range(file_path, read_offset, piece_bytes)
3173 .ok()?
3174 }
3175 };
3176
3177 let mut newlines_found = 0;
3179 for (i, &byte) in piece_data.iter().enumerate() {
3180 if byte == b'\n' {
3181 newlines_found += 1;
3182 if newlines_found == lines_to_skip {
3183 return Some(doc_offset + i + 1);
3185 }
3186 }
3187 }
3188
3189 Some(doc_offset + piece_bytes)
3192 }
3193
3194 pub fn original_file_size(&self) -> Option<usize> {
3198 self.saved_file_size
3201 }
3202
3203 pub fn get_recovery_chunks(&self) -> Vec<(usize, Vec<u8>)> {
3212 use crate::model::piece_tree::BufferLocation;
3213
3214 let mut chunks = Vec::new();
3215 let total = self.total_bytes();
3216
3217 let mut stored_bytes_before = 0;
3223
3224 for piece in self.piece_tree.iter_pieces_in_range(0, total) {
3225 match piece.location {
3226 BufferLocation::Stored(_) => {
3227 stored_bytes_before += piece.bytes;
3229 }
3230 BufferLocation::Added(buffer_id) => {
3231 if let Some(buffer) = self.buffers.iter().find(|b| b.id == buffer_id) {
3232 if buffer.stored_file_offset.is_some() {
3249 stored_bytes_before += piece.bytes;
3250 continue;
3251 }
3252 if let Some(data) = buffer.get_data() {
3254 let start = piece.buffer_offset;
3256 let end = start + piece.bytes;
3257 if end <= data.len() {
3258 chunks.push((stored_bytes_before, data[start..end].to_vec()));
3262 }
3263 }
3264 }
3265 }
3266 }
3267 }
3268
3269 chunks
3270 }
3271
3272 pub fn is_binary(&self) -> bool {
3274 self.is_binary
3275 }
3276
3277 pub fn line_ending(&self) -> LineEnding {
3279 self.line_ending
3280 }
3281
3282 pub fn set_line_ending(&mut self, line_ending: LineEnding) {
3287 self.line_ending = line_ending;
3288 self.mark_content_modified();
3289 }
3290
3291 pub fn set_default_line_ending(&mut self, line_ending: LineEnding) {
3296 self.line_ending = line_ending;
3297 self.original_line_ending = line_ending;
3298 }
3299
3300 pub fn encoding(&self) -> Encoding {
3302 self.encoding
3303 }
3304
3305 pub fn set_encoding(&mut self, encoding: Encoding) {
3310 self.encoding = encoding;
3311 self.mark_content_modified();
3312 }
3313
3314 pub fn set_default_encoding(&mut self, encoding: Encoding) {
3319 self.encoding = encoding;
3320 self.original_encoding = encoding;
3321 }
3322
3323 pub fn detect_line_ending(bytes: &[u8]) -> LineEnding {
3328 let check_len = bytes.len().min(8 * 1024);
3330 let sample = &bytes[..check_len];
3331
3332 let mut crlf_count = 0;
3333 let mut lf_only_count = 0;
3334 let mut cr_only_count = 0;
3335
3336 let mut i = 0;
3337 while i < sample.len() {
3338 if sample[i] == b'\r' {
3339 if i + 1 < sample.len() && sample[i + 1] == b'\n' {
3341 crlf_count += 1;
3342 i += 2; continue;
3344 } else {
3345 cr_only_count += 1;
3347 }
3348 } else if sample[i] == b'\n' {
3349 lf_only_count += 1;
3351 }
3352 i += 1;
3353 }
3354
3355 if crlf_count > lf_only_count && crlf_count > cr_only_count {
3357 LineEnding::CRLF
3358 } else if cr_only_count > lf_only_count && cr_only_count > crlf_count {
3359 LineEnding::CR
3360 } else {
3361 LineEnding::LF
3363 }
3364 }
3365
3366 pub fn detect_encoding(bytes: &[u8]) -> Encoding {
3371 encoding::detect_encoding(bytes)
3372 }
3373
3374 pub fn detect_encoding_or_binary(bytes: &[u8], truncated: bool) -> (Encoding, bool) {
3382 encoding::detect_encoding_or_binary(bytes, truncated)
3383 }
3384
3385 pub fn detect_and_convert_encoding(bytes: &[u8]) -> (Encoding, Vec<u8>) {
3390 encoding::detect_and_convert(bytes)
3391 }
3392
3393 pub fn convert_to_encoding(utf8_bytes: &[u8], target_encoding: Encoding) -> Vec<u8> {
3399 encoding::convert_from_utf8(utf8_bytes, target_encoding)
3400 }
3401
3402 #[allow(dead_code)] pub fn normalize_line_endings(bytes: Vec<u8>) -> Vec<u8> {
3409 let mut normalized = Vec::with_capacity(bytes.len());
3410 let mut i = 0;
3411
3412 while i < bytes.len() {
3413 if bytes[i] == b'\r' {
3414 if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
3416 normalized.push(b'\n');
3418 i += 2; continue;
3420 } else {
3421 normalized.push(b'\n');
3423 }
3424 } else {
3425 normalized.push(bytes[i]);
3427 }
3428 i += 1;
3429 }
3430
3431 normalized
3432 }
3433
3434 fn convert_line_endings_to(bytes: &[u8], target_ending: LineEnding) -> Vec<u8> {
3439 let mut normalized = Vec::with_capacity(bytes.len());
3441 let mut i = 0;
3442 while i < bytes.len() {
3443 if bytes[i] == b'\r' {
3444 if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
3446 normalized.push(b'\n');
3448 i += 2;
3449 continue;
3450 } else {
3451 normalized.push(b'\n');
3453 }
3454 } else {
3455 normalized.push(bytes[i]);
3456 }
3457 i += 1;
3458 }
3459
3460 if target_ending == LineEnding::LF {
3462 return normalized;
3463 }
3464
3465 let replacement = target_ending.as_str().as_bytes();
3467 let mut result = Vec::with_capacity(normalized.len() + normalized.len() / 10);
3468
3469 for byte in normalized {
3470 if byte == b'\n' {
3471 result.extend_from_slice(replacement);
3472 } else {
3473 result.push(byte);
3474 }
3475 }
3476
3477 result
3478 }
3479
3480 pub fn get_line(&self, line: usize) -> Option<Vec<u8>> {
3482 let (start, end) = self.piece_tree.line_range(line, &self.buffers)?;
3483
3484 let bytes = if let Some(end_offset) = end {
3485 end_offset.saturating_sub(start)
3486 } else {
3487 self.total_bytes().saturating_sub(start)
3488 };
3489
3490 self.get_text_range(start, bytes)
3491 }
3492
3493 pub fn line_start_offset(&self, line: usize) -> Option<usize> {
3495 let (start, _) = self.piece_tree.line_range(line, &self.buffers)?;
3496 Some(start)
3497 }
3498
3499 pub fn piece_info_at_offset(&self, offset: usize) -> Option<PieceInfo> {
3501 self.piece_tree.find_by_offset(offset)
3502 }
3503
3504 pub fn stats(&self) -> TreeStats {
3506 self.piece_tree.stats()
3507 }
3508
3509 pub fn find_next(&self, pattern: &str, start_pos: usize) -> Option<usize> {
3513 if pattern.is_empty() {
3514 return None;
3515 }
3516
3517 let pattern_bytes = pattern.as_bytes();
3518 let buffer_len = self.len();
3519
3520 if start_pos < buffer_len {
3522 if let Some(offset) = self.find_pattern(start_pos, buffer_len, pattern_bytes) {
3523 return Some(offset);
3524 }
3525 }
3526
3527 if start_pos > 0 {
3529 if let Some(offset) = self.find_pattern(0, start_pos, pattern_bytes) {
3530 return Some(offset);
3531 }
3532 }
3533
3534 None
3535 }
3536
3537 pub fn find_next_in_range(
3541 &self,
3542 pattern: &str,
3543 start_pos: usize,
3544 range: Option<Range<usize>>,
3545 ) -> Option<usize> {
3546 if pattern.is_empty() {
3547 return None;
3548 }
3549
3550 if let Some(search_range) = range {
3551 let pattern_bytes = pattern.as_bytes();
3553 let search_start = start_pos.max(search_range.start);
3554 let search_end = search_range.end.min(self.len());
3555
3556 if search_start < search_end {
3557 self.find_pattern(search_start, search_end, pattern_bytes)
3558 } else {
3559 None
3560 }
3561 } else {
3562 self.find_next(pattern, start_pos)
3564 }
3565 }
3566
3567 fn find_pattern(&self, start: usize, end: usize, pattern: &[u8]) -> Option<usize> {
3569 if pattern.is_empty() || start >= end {
3570 return None;
3571 }
3572
3573 const CHUNK_SIZE: usize = 65536; let overlap = pattern.len().saturating_sub(1).max(1);
3575
3576 let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, overlap);
3578
3579 for chunk in chunks {
3580 if let Some(pos) = Self::find_in_bytes(&chunk.buffer, pattern) {
3582 let match_end = pos + pattern.len();
3583 if match_end > chunk.valid_start {
3586 let absolute_pos = chunk.absolute_pos + pos;
3587 if absolute_pos + pattern.len() <= end {
3589 return Some(absolute_pos);
3590 }
3591 }
3592 }
3593 }
3594
3595 None
3596 }
3597
3598 fn find_in_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
3600 if needle.is_empty() || needle.len() > haystack.len() {
3601 return None;
3602 }
3603
3604 (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
3605 }
3606
3607 pub fn find_next_regex(&self, regex: &Regex, start_pos: usize) -> Option<usize> {
3609 let buffer_len = self.len();
3610
3611 if start_pos < buffer_len {
3613 if let Some(offset) = self.find_regex(start_pos, buffer_len, regex) {
3614 return Some(offset);
3615 }
3616 }
3617
3618 if start_pos > 0 {
3620 if let Some(offset) = self.find_regex(0, start_pos, regex) {
3621 return Some(offset);
3622 }
3623 }
3624
3625 None
3626 }
3627
3628 pub fn find_next_regex_in_range(
3630 &self,
3631 regex: &Regex,
3632 start_pos: usize,
3633 range: Option<Range<usize>>,
3634 ) -> Option<usize> {
3635 if let Some(search_range) = range {
3636 let search_start = start_pos.max(search_range.start);
3637 let search_end = search_range.end.min(self.len());
3638
3639 if search_start < search_end {
3640 self.find_regex(search_start, search_end, regex)
3641 } else {
3642 None
3643 }
3644 } else {
3645 self.find_next_regex(regex, start_pos)
3646 }
3647 }
3648
3649 fn find_regex(&self, start: usize, end: usize, regex: &Regex) -> Option<usize> {
3651 if start >= end {
3652 return None;
3653 }
3654
3655 const CHUNK_SIZE: usize = 1048576; const OVERLAP: usize = 4096; let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, OVERLAP);
3661
3662 for chunk in chunks {
3663 if let Some(mat) = regex.find(&chunk.buffer) {
3665 let match_end = mat.end();
3666 if match_end > chunk.valid_start {
3669 let absolute_pos = chunk.absolute_pos + mat.start();
3670 let match_len = mat.end() - mat.start();
3672 if absolute_pos + match_len <= end {
3673 return Some(absolute_pos);
3674 }
3675 }
3676 }
3677 }
3678
3679 None
3680 }
3681
3682 pub fn replace_range(&mut self, range: Range<usize>, replacement: &str) -> bool {
3684 if range.start >= self.len() {
3685 return false;
3686 }
3687
3688 let end = range.end.min(self.len());
3689 if end > range.start {
3690 self.delete_bytes(range.start, end - range.start);
3691 }
3692
3693 if !replacement.is_empty() {
3694 self.insert(range.start, replacement);
3695 }
3696
3697 true
3698 }
3699
3700 pub fn replace_next(
3702 &mut self,
3703 pattern: &str,
3704 replacement: &str,
3705 start_pos: usize,
3706 range: Option<Range<usize>>,
3707 ) -> Option<usize> {
3708 if let Some(pos) = self.find_next_in_range(pattern, start_pos, range.clone()) {
3709 self.replace_range(pos..pos + pattern.len(), replacement);
3710 Some(pos)
3711 } else {
3712 None
3713 }
3714 }
3715
3716 pub fn replace_all(&mut self, pattern: &str, replacement: &str) -> usize {
3718 if pattern.is_empty() {
3719 return 0;
3720 }
3721
3722 let mut count = 0;
3723 let mut pos = 0;
3724
3725 while let Some(found_pos) = self.find_next_in_range(pattern, pos, Some(0..self.len())) {
3729 self.replace_range(found_pos..found_pos + pattern.len(), replacement);
3730 count += 1;
3731
3732 pos = found_pos + replacement.len();
3734
3735 if pos >= self.len() {
3737 break;
3738 }
3739 }
3740
3741 count
3742 }
3743
3744 pub fn replace_all_regex(&mut self, regex: &Regex, replacement: &str) -> Result<usize> {
3746 let mut count = 0;
3747 let mut pos = 0;
3748
3749 while let Some(found_pos) = self.find_next_regex_in_range(regex, pos, Some(0..self.len())) {
3750 let text = self
3752 .get_text_range_mut(found_pos, self.len() - found_pos)
3753 .context("Failed to read text for regex match")?;
3754
3755 if let Some(mat) = regex.find(&text) {
3756 self.replace_range(found_pos..found_pos + mat.len(), replacement);
3757 count += 1;
3758 pos = found_pos + replacement.len();
3759
3760 if pos >= self.len() {
3761 break;
3762 }
3763 } else {
3764 break;
3765 }
3766 }
3767
3768 Ok(count)
3769 }
3770
3771 pub fn position_to_line_col(&self, byte_pos: usize) -> (usize, usize) {
3775 self.offset_to_position(byte_pos)
3776 .map(|pos| (pos.line, pos.column))
3777 .unwrap_or_else(|| (byte_pos / 80, 0)) }
3779
3780 pub fn line_col_to_position(&self, line: usize, character: usize) -> usize {
3784 if let Some((start, end)) = self.piece_tree.line_range(line, &self.buffers) {
3785 let line_len = if let Some(end_offset) = end {
3787 end_offset.saturating_sub(start)
3788 } else {
3789 self.total_bytes().saturating_sub(start)
3790 };
3791 let byte_offset = character.min(line_len);
3792 start + byte_offset
3793 } else {
3794 self.len()
3796 }
3797 }
3798
3799 pub fn position_to_lsp_position(&self, byte_pos: usize) -> (usize, usize) {
3802 let (line, column_bytes) = self
3803 .offset_to_position(byte_pos)
3804 .map(|pos| (pos.line, pos.column))
3805 .unwrap_or_else(|| (byte_pos / 80, 0)); if let Some(line_bytes) = self.get_line(line) {
3809 let text_before = &line_bytes[..column_bytes.min(line_bytes.len())];
3811 let text_str = String::from_utf8_lossy(text_before);
3812 let utf16_offset = text_str.encode_utf16().count();
3813 (line, utf16_offset)
3814 } else {
3815 (line, 0)
3816 }
3817 }
3818
3819 pub fn lsp_position_to_byte(&self, line: usize, utf16_offset: usize) -> usize {
3823 if let Some((line_start, end)) = self.piece_tree.line_range(line, &self.buffers) {
3824 let line_len = if let Some(end_offset) = end {
3826 end_offset.saturating_sub(line_start)
3827 } else {
3828 self.total_bytes().saturating_sub(line_start)
3829 };
3830
3831 if line_len > 0 {
3832 let Some(line_bytes) = self.get_text_range(line_start, line_len) else {
3834 return line_start;
3835 };
3836 let line_str = String::from_utf8_lossy(&line_bytes);
3837
3838 let mut utf16_count = 0;
3840 let mut byte_offset = 0;
3841
3842 for ch in line_str.chars() {
3843 if utf16_count >= utf16_offset {
3844 break;
3845 }
3846 utf16_count += ch.len_utf16();
3847 byte_offset += ch.len_utf8();
3848 }
3849
3850 line_start + byte_offset
3851 } else {
3852 line_start
3853 }
3854 } else {
3855 self.len()
3857 }
3858 }
3859
3860 pub fn prev_char_boundary(&self, pos: usize) -> usize {
3864 if pos == 0 {
3865 return 0;
3866 }
3867
3868 let start = pos.saturating_sub(4);
3870 let Some(bytes) = self.get_text_range(start, pos - start) else {
3871 return pos;
3873 };
3874
3875 for i in (0..bytes.len()).rev() {
3877 let byte = bytes[i];
3878 if (byte & 0b1100_0000) != 0b1000_0000 {
3880 return start + i;
3881 }
3882 }
3883
3884 pos.saturating_sub(1)
3886 }
3887
3888 pub fn next_char_boundary(&self, pos: usize) -> usize {
3890 let len = self.len();
3891 if pos >= len {
3892 return len;
3893 }
3894
3895 let end = (pos + 5).min(len);
3897 let Some(bytes) = self.get_text_range(pos, end - pos) else {
3898 return pos;
3900 };
3901
3902 for (i, &byte) in bytes.iter().enumerate().skip(1) {
3904 if (byte & 0b1100_0000) != 0b1000_0000 {
3906 return pos + i;
3907 }
3908 }
3909
3910 end
3912 }
3913
3914 #[inline]
3918 fn is_utf8_continuation_byte(byte: u8) -> bool {
3919 (byte & 0b1100_0000) == 0b1000_0000
3920 }
3921
3922 pub fn snap_to_char_boundary(&self, pos: usize) -> usize {
3926 let len = self.len();
3927 if pos == 0 || pos >= len {
3928 return pos.min(len);
3929 }
3930
3931 let Some(bytes) = self.get_text_range(pos, 1) else {
3933 return pos;
3935 };
3936
3937 if !Self::is_utf8_continuation_byte(bytes[0]) {
3939 return pos;
3941 }
3942
3943 self.prev_char_boundary(pos)
3945 }
3946
3947 pub fn prev_grapheme_boundary(&self, pos: usize) -> usize {
3953 if pos == 0 {
3954 return 0;
3955 }
3956
3957 let raw_start = pos.saturating_sub(32);
3962 let start = if raw_start == 0 {
3963 0
3964 } else {
3965 self.prev_char_boundary(raw_start + 1)
3967 };
3968
3969 let Some(bytes) = self.get_text_range(start, pos - start) else {
3970 return self.prev_char_boundary(pos);
3972 };
3973
3974 let text = match std::str::from_utf8(&bytes) {
3975 Ok(s) => s,
3976 Err(e) => {
3977 let valid_bytes = &bytes[..e.valid_up_to()];
3980 match std::str::from_utf8(valid_bytes) {
3981 Ok(s) if !s.is_empty() => s,
3982 _ => return self.prev_char_boundary(pos),
3983 }
3984 }
3985 };
3986
3987 let rel_pos = pos - start;
3989 let new_rel_pos = grapheme::prev_grapheme_boundary(text, rel_pos);
3990
3991 if new_rel_pos == 0 && start > 0 {
3994 return self.prev_grapheme_boundary(start);
3995 }
3996
3997 start + new_rel_pos
3998 }
3999
4000 pub fn next_grapheme_boundary(&self, pos: usize) -> usize {
4006 let len = self.len();
4007 if pos >= len {
4008 return len;
4009 }
4010
4011 let end = (pos + 32).min(len);
4014 let Some(bytes) = self.get_text_range(pos, end - pos) else {
4015 return self.next_char_boundary(pos);
4017 };
4018
4019 let text = match std::str::from_utf8(&bytes) {
4022 Ok(s) => s,
4023 Err(e) => {
4024 let valid_bytes = &bytes[..e.valid_up_to()];
4027 match std::str::from_utf8(valid_bytes) {
4028 Ok(s) if !s.is_empty() => s,
4029 _ => return self.next_char_boundary(pos),
4030 }
4031 }
4032 };
4033
4034 let new_rel_pos = grapheme::next_grapheme_boundary(text, 0);
4036 pos + new_rel_pos
4037 }
4038
4039 pub fn prev_word_boundary(&self, pos: usize) -> usize {
4041 if pos == 0 {
4042 return 0;
4043 }
4044
4045 let start = pos.saturating_sub(256).max(0);
4047 let Some(bytes) = self.get_text_range(start, pos - start) else {
4048 return pos;
4050 };
4051 let text = String::from_utf8_lossy(&bytes);
4052
4053 let mut found_word_char = false;
4054 let chars: Vec<char> = text.chars().collect();
4055
4056 for i in (0..chars.len()).rev() {
4057 let ch = chars[i];
4058 let is_word_char = ch.is_alphanumeric() || ch == '_';
4059
4060 if found_word_char && !is_word_char {
4061 let byte_offset: usize = chars[0..=i].iter().map(|c| c.len_utf8()).sum();
4064 return start + byte_offset;
4065 }
4066
4067 if is_word_char {
4068 found_word_char = true;
4069 }
4070 }
4071
4072 0
4073 }
4074
4075 pub fn next_word_boundary(&self, pos: usize) -> usize {
4077 let len = self.len();
4078 if pos >= len {
4079 return len;
4080 }
4081
4082 let end = (pos + 256).min(len);
4084 let Some(bytes) = self.get_text_range(pos, end - pos) else {
4085 return pos;
4087 };
4088 let text = String::from_utf8_lossy(&bytes);
4089
4090 let mut found_word_char = false;
4091 let mut byte_offset = 0;
4092
4093 for ch in text.chars() {
4094 let is_word_char = ch.is_alphanumeric() || ch == '_';
4095
4096 if found_word_char && !is_word_char {
4097 return pos + byte_offset;
4099 }
4100
4101 if is_word_char {
4102 found_word_char = true;
4103 }
4104
4105 byte_offset += ch.len_utf8();
4106 }
4107
4108 len
4109 }
4110
4111 pub fn line_iterator(
4116 &mut self,
4117 byte_pos: usize,
4118 estimated_line_length: usize,
4119 ) -> LineIterator<'_> {
4120 LineIterator::new(self, byte_pos, estimated_line_length)
4121 }
4122
4123 pub fn iter_lines_from(
4137 &mut self,
4138 byte_pos: usize,
4139 max_lines: usize,
4140 ) -> Result<TextBufferLineIterator> {
4141 TextBufferLineIterator::new(self, byte_pos, max_lines)
4142 }
4143
4144 pub fn get_line_number(&self, byte_offset: usize) -> usize {
4157 self.offset_to_position(byte_offset)
4158 .map(|pos| pos.line)
4159 .unwrap_or_else(|| {
4160 byte_offset / self.config.estimated_line_length
4162 })
4163 }
4164
4165 pub fn estimated_line_length(&self) -> usize {
4167 self.config.estimated_line_length
4168 }
4169
4170 pub fn populate_line_cache(&mut self, start_byte: usize, _line_count: usize) -> usize {
4204 self.get_line_number(start_byte)
4207 }
4208
4209 pub fn get_cached_byte_offset_for_line(&self, line_number: usize) -> Option<usize> {
4211 self.line_start_offset(line_number)
4212 }
4213
4214 pub fn invalidate_line_cache_from(&mut self, _byte_offset: usize) {
4216 }
4218
4219 pub fn handle_line_cache_insertion(&mut self, _byte_offset: usize, _bytes_inserted: usize) {
4221 }
4223
4224 pub fn handle_line_cache_deletion(&mut self, _byte_offset: usize, _bytes_deleted: usize) {
4226 }
4228
4229 pub fn clear_line_cache(&mut self) {
4231 }
4233
4234 #[cfg(test)]
4238 pub fn from_str_test(s: &str) -> Self {
4239 Self::from_bytes(
4240 s.as_bytes().to_vec(),
4241 std::sync::Arc::new(crate::model::filesystem::StdFileSystem),
4242 )
4243 }
4244
4245 #[cfg(test)]
4247 pub fn new_test() -> Self {
4248 Self::empty(std::sync::Arc::new(crate::model::filesystem::StdFileSystem))
4249 }
4250}
4251
4252pub type Buffer = TextBuffer;
4254
4255pub use crate::primitives::line_iterator::LineIterator;
4257
4258#[derive(Debug)]
4264pub struct ChunkInfo {
4265 pub buffer: Vec<u8>,
4267
4268 pub absolute_pos: usize,
4270
4271 pub valid_start: usize,
4274}
4275
4276pub struct OverlappingChunks<'a> {
4304 piece_iter: PieceRangeIter,
4305 buffers: &'a [StringBuffer],
4306
4307 buffer: Vec<u8>,
4309 buffer_absolute_pos: usize,
4310
4311 current_pos: usize,
4313 end_pos: usize,
4314
4315 chunk_size: usize,
4317 overlap: usize,
4318
4319 first_chunk: bool,
4321
4322 current_piece_data: Option<Vec<u8>>,
4324 current_piece_offset: usize,
4325}
4326
4327impl<'a> OverlappingChunks<'a> {
4328 pub fn new(
4343 text_buffer: &'a TextBuffer,
4344 start: usize,
4345 end: usize,
4346 chunk_size: usize,
4347 overlap: usize,
4348 ) -> Self {
4349 let piece_iter = text_buffer.piece_tree.iter_pieces_in_range(start, end);
4350
4351 Self {
4352 piece_iter,
4353 buffers: &text_buffer.buffers,
4354 buffer: Vec::with_capacity(chunk_size + overlap),
4355 buffer_absolute_pos: start,
4356 current_pos: start,
4357 end_pos: end,
4358 chunk_size,
4359 overlap,
4360 first_chunk: true,
4361 current_piece_data: None,
4362 current_piece_offset: 0,
4363 }
4364 }
4365
4366 fn read_byte(&mut self) -> Option<u8> {
4368 loop {
4369 if let Some(ref data) = self.current_piece_data {
4371 if self.current_piece_offset < data.len() {
4372 let byte = data[self.current_piece_offset];
4373 self.current_piece_offset += 1;
4374 self.current_pos += 1;
4375 return Some(byte);
4376 } else {
4377 self.current_piece_data = None;
4379 self.current_piece_offset = 0;
4380 }
4381 }
4382
4383 if let Some(piece_view) = self.piece_iter.next() {
4385 let buffer_id = piece_view.location.buffer_id();
4386 if let Some(buffer) = self.buffers.get(buffer_id) {
4387 let piece_start_in_doc = piece_view.doc_offset;
4389 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
4390
4391 let read_start = self.current_pos.max(piece_start_in_doc);
4393 let read_end = self.end_pos.min(piece_end_in_doc);
4394
4395 if read_end > read_start {
4396 let offset_in_piece = read_start - piece_start_in_doc;
4397 let bytes_to_read = read_end - read_start;
4398
4399 let buffer_start = piece_view.buffer_offset + offset_in_piece;
4400 let buffer_end = buffer_start + bytes_to_read;
4401
4402 if let Some(data) = buffer.get_data() {
4403 if buffer_end <= data.len() {
4404 self.current_piece_data =
4406 Some(data[buffer_start..buffer_end].to_vec());
4407 self.current_piece_offset = 0;
4408 continue;
4409 }
4410 }
4411 }
4412 }
4413 }
4414
4415 return None;
4417 }
4418 }
4419
4420 fn fill_next_chunk(&mut self) -> bool {
4422 if self.first_chunk {
4423 self.first_chunk = false;
4425 while self.buffer.len() < self.chunk_size && self.current_pos < self.end_pos {
4426 if let Some(byte) = self.read_byte() {
4427 self.buffer.push(byte);
4428 } else {
4429 break;
4430 }
4431 }
4432 !self.buffer.is_empty()
4433 } else {
4434 if self.current_pos >= self.end_pos {
4436 return false;
4437 }
4438
4439 if self.buffer.len() > self.overlap {
4441 let drain_amount = self.buffer.len() - self.overlap;
4442 self.buffer.drain(0..drain_amount);
4443 self.buffer_absolute_pos += drain_amount;
4444 }
4445
4446 let before_len = self.buffer.len();
4448 let target_len = self.overlap + self.chunk_size;
4449 while self.buffer.len() < target_len && self.current_pos < self.end_pos {
4450 if let Some(byte) = self.read_byte() {
4451 self.buffer.push(byte);
4452 } else {
4453 break;
4454 }
4455 }
4456
4457 self.buffer.len() > before_len
4459 }
4460 }
4461}
4462
4463impl<'a> Iterator for OverlappingChunks<'a> {
4464 type Item = ChunkInfo;
4465
4466 fn next(&mut self) -> Option<Self::Item> {
4467 let is_first = self.buffer_absolute_pos == self.current_pos;
4469
4470 if !self.fill_next_chunk() {
4471 return None;
4472 }
4473
4474 let valid_start = if is_first {
4477 0
4478 } else {
4479 self.overlap.min(self.buffer.len())
4480 };
4481
4482 Some(ChunkInfo {
4483 buffer: self.buffer.clone(),
4484 absolute_pos: self.buffer_absolute_pos,
4485 valid_start,
4486 })
4487 }
4488}
4489
4490#[derive(Debug)]
4493pub(crate) enum SearchRegion {
4494 Unloaded {
4496 file_offset: usize,
4497 bytes: usize,
4498 doc_offset: usize,
4499 },
4500 Loaded { data: Vec<u8>, doc_offset: usize },
4502}
4503
4504#[derive(Debug)]
4512pub struct HybridSearchPlan {
4513 pub(crate) file_path: PathBuf,
4514 pub(crate) regions: Vec<SearchRegion>,
4515}
4516
4517impl HybridSearchPlan {
4518 pub fn execute(
4521 &self,
4522 fs: &dyn FileSystem,
4523 pattern: &str,
4524 opts: &FileSearchOptions,
4525 regex: &Regex,
4526 max_matches: usize,
4527 query_len: usize,
4528 ) -> io::Result<Vec<SearchMatch>> {
4529 if self.regions.is_empty() {
4530 return Ok(vec![]);
4531 }
4532
4533 if self.regions.len() == 1 {
4535 if let SearchRegion::Unloaded { .. } = &self.regions[0] {
4536 let mut cursor = FileSearchCursor::new();
4537 let mut all_matches = Vec::new();
4538 while !cursor.done && all_matches.len() < max_matches {
4539 let batch = fs.search_file(&self.file_path, pattern, opts, &mut cursor)?;
4540 all_matches.extend(batch);
4541 }
4542 all_matches.truncate(max_matches);
4543 return Ok(all_matches);
4544 }
4545 }
4546
4547 let overlap_size = query_len.max(256);
4548 let mut all_matches: Vec<SearchMatch> = Vec::new();
4549 let mut running_line: usize = 1;
4550 let mut prev_tail: Vec<u8> = Vec::new();
4551
4552 for region in &self.regions {
4553 if all_matches.len() >= max_matches {
4554 break;
4555 }
4556 let remaining = max_matches - all_matches.len();
4557
4558 match region {
4559 SearchRegion::Unloaded {
4560 file_offset,
4561 bytes,
4562 doc_offset: region_doc_offset,
4563 } => {
4564 if !prev_tail.is_empty() {
4566 let overlap_read = (*bytes).min(overlap_size);
4567 if let Ok(head) =
4568 fs.read_range(&self.file_path, *file_offset as u64, overlap_read)
4569 {
4570 let boundary = search_boundary_overlap(
4571 &prev_tail,
4572 &head,
4573 *region_doc_offset - prev_tail.len(),
4574 running_line,
4575 regex,
4576 remaining,
4577 );
4578 all_matches.extend(boundary);
4579 }
4580 }
4581
4582 let mut opts_bounded = opts.clone();
4584 opts_bounded.max_matches = remaining.saturating_sub(all_matches.len());
4585 let mut cursor = FileSearchCursor::for_range(
4586 *file_offset,
4587 *file_offset + *bytes,
4588 running_line,
4589 );
4590 while !cursor.done && all_matches.len() < max_matches {
4591 let mut batch =
4592 fs.search_file(&self.file_path, pattern, &opts_bounded, &mut cursor)?;
4593 for m in &mut batch {
4595 m.byte_offset = *region_doc_offset + (m.byte_offset - *file_offset);
4596 }
4597 all_matches.extend(batch);
4598 }
4599 running_line = cursor.running_line;
4600
4601 if *bytes >= overlap_size {
4603 let tail_off = *file_offset + *bytes - overlap_size;
4604 prev_tail = fs
4605 .read_range(&self.file_path, tail_off as u64, overlap_size)
4606 .unwrap_or_default();
4607 } else {
4608 prev_tail = fs
4609 .read_range(&self.file_path, *file_offset as u64, *bytes)
4610 .unwrap_or_default();
4611 }
4612 }
4613 SearchRegion::Loaded {
4614 data,
4615 doc_offset: region_doc_offset,
4616 } => {
4617 let mut search_buf = Vec::with_capacity(prev_tail.len() + data.len());
4619 search_buf.extend_from_slice(&prev_tail);
4620 search_buf.extend_from_slice(data);
4621
4622 let overlap_len = prev_tail.len();
4623 let buf_doc_offset = if overlap_len > 0 {
4624 *region_doc_offset - overlap_len
4625 } else {
4626 *region_doc_offset
4627 };
4628
4629 let newlines_in_overlap = search_buf[..overlap_len]
4630 .iter()
4631 .filter(|&&b| b == b'\n')
4632 .count();
4633 let mut line_at = running_line.saturating_sub(newlines_in_overlap);
4634 let mut counted_to = 0usize;
4635
4636 for m in regex.find_iter(&search_buf) {
4637 if overlap_len > 0 && m.end() <= overlap_len {
4638 continue;
4639 }
4640 if all_matches.len() >= max_matches {
4641 break;
4642 }
4643
4644 line_at += search_buf[counted_to..m.start()]
4645 .iter()
4646 .filter(|&&b| b == b'\n')
4647 .count();
4648 counted_to = m.start();
4649
4650 let line_start = search_buf[..m.start()]
4651 .iter()
4652 .rposition(|&b| b == b'\n')
4653 .map(|p| p + 1)
4654 .unwrap_or(0);
4655 let line_end = search_buf[m.start()..]
4656 .iter()
4657 .position(|&b| b == b'\n')
4658 .map(|p| m.start() + p)
4659 .unwrap_or(search_buf.len());
4660
4661 let match_doc_offset = buf_doc_offset + m.start();
4662 let column = m.start() - line_start + 1;
4663 let context =
4664 String::from_utf8_lossy(&search_buf[line_start..line_end]).into_owned();
4665
4666 all_matches.push(SearchMatch {
4667 byte_offset: match_doc_offset,
4668 length: m.end() - m.start(),
4669 line: line_at,
4670 column,
4671 context,
4672 });
4673 }
4674
4675 running_line += data.iter().filter(|&&b| b == b'\n').count();
4676
4677 let tail_start = data.len().saturating_sub(overlap_size);
4678 prev_tail = data[tail_start..].to_vec();
4679 }
4680 }
4681 }
4682
4683 all_matches.truncate(max_matches);
4684 Ok(all_matches)
4685 }
4686}
4687
4688fn search_boundary_overlap(
4694 prev_tail: &[u8],
4695 next_head: &[u8],
4696 doc_offset: usize,
4697 running_line: usize,
4698 regex: &Regex,
4699 max_matches: usize,
4700) -> Vec<SearchMatch> {
4701 let mut buf = Vec::with_capacity(prev_tail.len() + next_head.len());
4702 buf.extend_from_slice(prev_tail);
4703 buf.extend_from_slice(next_head);
4704
4705 let overlap_len = prev_tail.len();
4706 let newlines_before = prev_tail.iter().filter(|&&b| b == b'\n').count();
4707 let mut line_at = running_line.saturating_sub(newlines_before);
4708 let mut counted_to = 0usize;
4709 let mut matches = Vec::new();
4710
4711 for m in regex.find_iter(&buf) {
4712 if m.start() < overlap_len && m.end() > overlap_len {
4714 if matches.len() >= max_matches {
4715 break;
4716 }
4717
4718 line_at += buf[counted_to..m.start()]
4719 .iter()
4720 .filter(|&&b| b == b'\n')
4721 .count();
4722 counted_to = m.start();
4723
4724 let line_start = buf[..m.start()]
4725 .iter()
4726 .rposition(|&b| b == b'\n')
4727 .map(|p| p + 1)
4728 .unwrap_or(0);
4729 let line_end = buf[m.start()..]
4730 .iter()
4731 .position(|&b| b == b'\n')
4732 .map(|p| m.start() + p)
4733 .unwrap_or(buf.len());
4734
4735 let column = m.start() - line_start + 1;
4736 let context = String::from_utf8_lossy(&buf[line_start..line_end]).into_owned();
4737
4738 matches.push(SearchMatch {
4739 byte_offset: doc_offset + m.start(),
4740 length: m.end() - m.start(),
4741 line: line_at,
4742 column,
4743 context,
4744 });
4745 }
4746 }
4747 matches
4748}
4749
4750#[cfg(test)]
4751mod tests {
4752 use crate::model::filesystem::StdFileSystem;
4753 use std::sync::Arc;
4754
4755 fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
4756 Arc::new(StdFileSystem)
4757 }
4758 use super::*;
4759
4760 #[test]
4761 fn test_empty_buffer() {
4762 let buffer = TextBuffer::empty(test_fs());
4763 assert_eq!(buffer.total_bytes(), 0);
4764 assert_eq!(buffer.line_count(), Some(1)); }
4766
4767 #[test]
4768 fn test_line_positions_multiline() {
4769 let buffer = TextBuffer::from_bytes(b"Hello\nNew Line\nWorld!".to_vec(), test_fs());
4770
4771 assert_eq!(buffer.line_count(), Some(3));
4773
4774 assert_eq!(buffer.line_start_offset(0), Some(0)); assert_eq!(buffer.line_start_offset(1), Some(6)); assert_eq!(buffer.line_start_offset(2), Some(15)); assert_eq!(buffer.offset_to_position(0).unwrap().line, 0); assert_eq!(buffer.offset_to_position(5).unwrap().line, 0); assert_eq!(buffer.offset_to_position(6).unwrap().line, 1); assert_eq!(buffer.offset_to_position(14).unwrap().line, 1); assert_eq!(buffer.offset_to_position(15).unwrap().line, 2); assert_eq!(buffer.line_col_to_position(0, 5), 5); assert_eq!(buffer.line_col_to_position(1, 0), 6); assert_eq!(buffer.line_col_to_position(1, 8), 14); assert_eq!(buffer.line_col_to_position(2, 0), 15); }
4792
4793 #[test]
4794 fn test_new_from_content() {
4795 let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4796 assert_eq!(buffer.total_bytes(), 11);
4797 assert_eq!(buffer.line_count(), Some(2));
4798 }
4799
4800 #[test]
4801 fn test_get_all_text() {
4802 let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4803 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld");
4804 }
4805
4806 #[test]
4807 fn test_insert_at_start() {
4808 let mut buffer = TextBuffer::from_bytes(b"world".to_vec(), test_fs());
4809 buffer.insert_bytes(0, b"hello ".to_vec());
4810
4811 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4812 assert_eq!(buffer.total_bytes(), 11);
4813 }
4814
4815 #[test]
4816 fn test_insert_in_middle() {
4817 let mut buffer = TextBuffer::from_bytes(b"helloworld".to_vec(), test_fs());
4818 buffer.insert_bytes(5, b" ".to_vec());
4819
4820 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4821 assert_eq!(buffer.total_bytes(), 11);
4822 }
4823
4824 #[test]
4825 fn test_insert_at_end() {
4826 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4827 buffer.insert_bytes(5, b" world".to_vec());
4828
4829 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
4830 assert_eq!(buffer.total_bytes(), 11);
4831 }
4832
4833 #[test]
4834 fn test_insert_with_newlines() {
4835 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4836 buffer.insert_bytes(5, b"\nworld\ntest".to_vec());
4837
4838 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld\ntest");
4839 assert_eq!(buffer.line_count(), Some(3));
4840 }
4841
4842 #[test]
4843 fn test_delete_from_start() {
4844 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4845 buffer.delete_bytes(0, 6);
4846
4847 assert_eq!(buffer.get_all_text().unwrap(), b"world");
4848 assert_eq!(buffer.total_bytes(), 5);
4849 }
4850
4851 #[test]
4852 fn test_delete_from_middle() {
4853 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4854 buffer.delete_bytes(5, 1);
4855
4856 assert_eq!(buffer.get_all_text().unwrap(), b"helloworld");
4857 assert_eq!(buffer.total_bytes(), 10);
4858 }
4859
4860 #[test]
4861 fn test_delete_from_end() {
4862 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4863 buffer.delete_bytes(6, 5);
4864
4865 assert_eq!(buffer.get_all_text().unwrap(), b"hello ");
4866 assert_eq!(buffer.total_bytes(), 6);
4867 }
4868
4869 #[test]
4870 fn test_delete_with_newlines() {
4871 let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4872 buffer.delete_bytes(5, 7); assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
4875 assert_eq!(buffer.line_count(), Some(1));
4876 }
4877
4878 #[test]
4879 fn test_offset_position_conversions() {
4880 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4881
4882 let pos = buffer.offset_to_position(0);
4883 assert_eq!(pos, Some(Position { line: 0, column: 0 }));
4884
4885 let pos = buffer.offset_to_position(6);
4886 assert_eq!(pos, Some(Position { line: 1, column: 0 }));
4887
4888 let offset = buffer.position_to_offset(Position { line: 1, column: 0 });
4889 assert_eq!(offset, 6);
4890 }
4891
4892 #[test]
4893 fn test_insert_at_position() {
4894 let mut buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
4895 buffer.insert_at_position(Position { line: 1, column: 0 }, b"beautiful ".to_vec());
4896
4897 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nbeautiful world");
4898 }
4899
4900 #[test]
4901 fn test_delete_range() {
4902 let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4903
4904 let start = Position { line: 0, column: 5 };
4905 let end = Position { line: 2, column: 0 };
4906 buffer.delete_range(start, end);
4907
4908 assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
4909 }
4910
4911 #[test]
4912 fn test_get_line() {
4913 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
4914
4915 assert_eq!(buffer.get_line(0), Some(b"hello\n".to_vec()));
4916 assert_eq!(buffer.get_line(1), Some(b"world\n".to_vec()));
4917 assert_eq!(buffer.get_line(2), Some(b"test".to_vec()));
4918 assert_eq!(buffer.get_line(3), None);
4919 }
4920
4921 #[test]
4922 fn test_multiple_operations() {
4923 let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
4924
4925 buffer.insert_bytes(0, b"start\n".to_vec());
4926 assert_eq!(buffer.line_count(), Some(4));
4927
4928 buffer.delete_bytes(6, 6); assert_eq!(buffer.line_count(), Some(3));
4930
4931 buffer.insert_bytes(6, b"new\n".to_vec());
4932 assert_eq!(buffer.line_count(), Some(4));
4933
4934 let text = buffer.get_all_text().unwrap();
4935 assert_eq!(text, b"start\nnew\nline2\nline3");
4936 }
4937
4938 #[test]
4939 fn test_get_text_range() {
4940 let buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
4941
4942 assert_eq!(buffer.get_text_range(0, 5), Some(b"hello".to_vec()));
4943 assert_eq!(buffer.get_text_range(6, 5), Some(b"world".to_vec()));
4944 assert_eq!(buffer.get_text_range(0, 11), Some(b"hello world".to_vec()));
4945 }
4946
4947 #[test]
4948 fn test_empty_operations() {
4949 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
4950
4951 buffer.insert_bytes(2, Vec::new());
4952 assert_eq!(buffer.get_all_text().unwrap(), b"hello");
4953
4954 buffer.delete_bytes(2, 0);
4955 assert_eq!(buffer.get_all_text().unwrap(), b"hello");
4956 }
4957
4958 #[test]
4959 fn test_sequential_inserts_at_beginning() {
4960 let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
4962
4963 buffer.delete_bytes(0, 12);
4965 assert_eq!(buffer.get_all_text().unwrap(), b"");
4966
4967 buffer.insert_bytes(0, vec![b'a']);
4969 assert_eq!(buffer.get_all_text().unwrap(), b"a");
4970
4971 buffer.insert_bytes(0, vec![b'b']);
4973 assert_eq!(buffer.get_all_text().unwrap(), b"ba");
4974 }
4975
4976 mod large_file_support {
4979 use super::*;
4980 use crate::model::piece_tree::StringBuffer;
4981 use std::fs::File;
4982 use std::io::Write;
4983 use tempfile::TempDir;
4984
4985 #[test]
4988 fn test_line_feed_count_is_some_for_loaded_buffer() {
4989 let buffer = StringBuffer::new(0, b"hello\nworld\ntest".to_vec());
4990 assert_eq!(buffer.line_feed_count(), Some(2));
4991 }
4992
4993 #[test]
4994 fn test_line_feed_count_is_none_for_unloaded_buffer() {
4995 let temp_dir = TempDir::new().unwrap();
4996 let file_path = temp_dir.path().join("test.txt");
4997
4998 let buffer = StringBuffer::new_unloaded(0, file_path, 0, 100);
4999 assert_eq!(buffer.line_feed_count(), None);
5000 }
5001
5002 #[test]
5003 fn test_line_count_is_some_for_small_buffer() {
5004 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
5005 assert_eq!(buffer.line_count(), Some(3));
5006 }
5007
5008 #[test]
5009 fn test_piece_tree_works_with_none_line_count() {
5010 let buffer = StringBuffer::new_loaded(0, b"hello\nworld".to_vec(), false);
5012 assert_eq!(buffer.line_feed_count(), None);
5013
5014 use crate::model::piece_tree::{BufferLocation, PieceTree};
5016 let tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, None);
5017
5018 assert_eq!(tree.line_count(), None);
5020 }
5021
5022 #[test]
5025 fn test_buffer_data_loaded_variant() {
5026 let data = b"hello world".to_vec();
5027 let buffer = StringBuffer::new_loaded(0, data.clone(), true);
5028
5029 assert!(buffer.is_loaded());
5030 assert_eq!(buffer.get_data(), Some(&data[..]));
5031 assert!(buffer.get_line_starts().is_some());
5032 }
5033
5034 #[test]
5035 fn test_buffer_data_loaded_without_line_starts() {
5036 let data = b"hello\nworld".to_vec();
5037 let buffer = StringBuffer::new_loaded(0, data.clone(), false);
5038
5039 assert!(buffer.is_loaded());
5040 assert_eq!(buffer.get_data(), Some(&data[..]));
5041 assert_eq!(buffer.get_line_starts(), None); }
5043
5044 #[test]
5045 fn test_buffer_data_unloaded_variant() {
5046 let temp_dir = TempDir::new().unwrap();
5047 let file_path = temp_dir.path().join("test.txt");
5048
5049 let buffer = StringBuffer::new_unloaded(0, file_path.clone(), 0, 100);
5050
5051 assert!(!buffer.is_loaded());
5052 assert_eq!(buffer.get_data(), None);
5053 assert_eq!(buffer.get_line_starts(), None);
5054 }
5055
5056 #[test]
5057 fn test_buffer_load_method() {
5058 let temp_dir = TempDir::new().unwrap();
5059 let file_path = temp_dir.path().join("test.txt");
5060
5061 let test_data = b"hello world";
5063 File::create(&file_path)
5064 .unwrap()
5065 .write_all(test_data)
5066 .unwrap();
5067
5068 let mut buffer = StringBuffer::new_unloaded(0, file_path, 0, test_data.len());
5070 assert!(!buffer.is_loaded());
5071
5072 let fs = crate::model::filesystem::StdFileSystem;
5074 buffer.load(&fs).unwrap();
5075
5076 assert!(buffer.is_loaded());
5078 assert_eq!(buffer.get_data(), Some(&test_data[..]));
5079 }
5080
5081 #[test]
5082 fn test_string_buffer_new_vs_new_loaded() {
5083 let data = b"hello\nworld".to_vec();
5084
5085 let buf1 = StringBuffer::new(0, data.clone());
5087 assert!(buf1.is_loaded());
5088 assert!(buf1.get_line_starts().is_some());
5089 assert_eq!(buf1.line_feed_count(), Some(1));
5090
5091 let buf2 = StringBuffer::new_loaded(0, data.clone(), false);
5093 assert!(buf2.is_loaded());
5094 assert_eq!(buf2.get_line_starts(), None);
5095 assert_eq!(buf2.line_feed_count(), None);
5096 }
5097
5098 #[test]
5101 fn test_load_small_file_eager_loading() {
5102 let temp_dir = TempDir::new().unwrap();
5103 let file_path = temp_dir.path().join("small.txt");
5104
5105 let test_data = b"hello\ntest";
5107 File::create(&file_path)
5108 .unwrap()
5109 .write_all(test_data)
5110 .unwrap();
5111
5112 let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
5114
5115 assert!(!buffer.large_file);
5117 assert_eq!(buffer.total_bytes(), test_data.len());
5118 assert_eq!(buffer.line_count(), Some(2)); assert_eq!(buffer.get_all_text().unwrap(), test_data);
5120
5121 assert!(buffer.buffers[0].is_loaded());
5123 }
5124
5125 #[test]
5126 fn test_load_large_file_lazy_loading() {
5127 let temp_dir = TempDir::new().unwrap();
5128 let file_path = temp_dir.path().join("large.txt");
5129
5130 let test_data = b"hello\nworld\ntest";
5132 File::create(&file_path)
5133 .unwrap()
5134 .write_all(test_data)
5135 .unwrap();
5136
5137 let buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
5139
5140 assert!(buffer.large_file);
5142 assert_eq!(buffer.total_bytes(), test_data.len());
5143
5144 assert_eq!(buffer.line_count(), None);
5146
5147 assert!(!buffer.buffers[0].is_loaded());
5149 assert_eq!(buffer.buffers[0].get_data(), None);
5150 }
5151
5152 #[test]
5160 fn test_issue_657_search_on_large_file_unloaded_buffer() {
5161 let temp_dir = TempDir::new().unwrap();
5162 let file_path = temp_dir.path().join("large_search_test.txt");
5163
5164 let test_data = b"line1\nline2\nSEARCH_TARGET\nline4\nline5";
5166 File::create(&file_path)
5167 .unwrap()
5168 .write_all(test_data)
5169 .unwrap();
5170
5171 let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
5173
5174 assert!(buffer.large_file, "Buffer should be in large file mode");
5176 assert!(
5177 !buffer.buffers[0].is_loaded(),
5178 "Buffer should be unloaded initially"
5179 );
5180
5181 assert!(
5184 buffer.to_string().is_none(),
5185 "BUG REPRODUCED: to_string() returns None for unloaded buffer"
5186 );
5187
5188 let total_bytes = buffer.len();
5190 let content = buffer.get_text_range_mut(0, total_bytes).unwrap();
5191 let content_str = String::from_utf8_lossy(&content);
5192
5193 assert!(
5195 content_str.contains("SEARCH_TARGET"),
5196 "FIX WORKS: get_text_range_mut() loaded the buffer and found the search target"
5197 );
5198
5199 assert!(
5201 buffer.to_string().is_some(),
5202 "After get_text_range_mut(), to_string() should work"
5203 );
5204 }
5205
5206 #[test]
5207 fn test_large_file_threshold_boundary() {
5208 let temp_dir = TempDir::new().unwrap();
5209
5210 let file_path = temp_dir.path().join("at_threshold.txt");
5212 let test_data = vec![b'x'; 100];
5213 File::create(&file_path)
5214 .unwrap()
5215 .write_all(&test_data)
5216 .unwrap();
5217
5218 let buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
5220 assert!(buffer.large_file);
5221
5222 let file_path2 = temp_dir.path().join("below_threshold.txt");
5224 let test_data2 = vec![b'x'; 99];
5225 File::create(&file_path2)
5226 .unwrap()
5227 .write_all(&test_data2)
5228 .unwrap();
5229
5230 let buffer2 = TextBuffer::load_from_file(&file_path2, 100, test_fs()).unwrap();
5232 assert!(!buffer2.large_file);
5233 }
5234
5235 #[test]
5236 fn test_large_file_default_threshold() {
5237 let temp_dir = TempDir::new().unwrap();
5238 let file_path = temp_dir.path().join("test.txt");
5239
5240 File::create(&file_path)
5242 .unwrap()
5243 .write_all(b"hello")
5244 .unwrap();
5245
5246 let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
5248
5249 assert!(!buffer.large_file);
5251 }
5252
5253 #[test]
5254 fn test_large_file_has_correct_piece_tree_structure() {
5255 let temp_dir = TempDir::new().unwrap();
5256 let file_path = temp_dir.path().join("large.txt");
5257
5258 let test_data = b"hello world";
5259 File::create(&file_path)
5260 .unwrap()
5261 .write_all(test_data)
5262 .unwrap();
5263
5264 let buffer = TextBuffer::load_from_file(&file_path, 5, test_fs()).unwrap();
5266
5267 assert_eq!(buffer.total_bytes(), test_data.len());
5269
5270 assert_eq!(buffer.buffers.len(), 1);
5272
5273 assert!(!buffer.buffers[0].is_loaded());
5275 }
5276
5277 #[test]
5278 fn test_empty_large_file() {
5279 let temp_dir = TempDir::new().unwrap();
5280 let file_path = temp_dir.path().join("empty.txt");
5281
5282 File::create(&file_path).unwrap();
5284
5285 let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
5287
5288 assert_eq!(buffer.total_bytes(), 0);
5290 assert!(buffer.is_empty());
5291 }
5292
5293 #[test]
5294 fn test_large_file_basic_api_operations() {
5295 let temp_dir = TempDir::new().unwrap();
5296 let file_path = temp_dir.path().join("large_test.txt");
5297
5298 let test_data = b"line1\nline2\nline3\nline4\n";
5300 File::create(&file_path)
5301 .unwrap()
5302 .write_all(test_data)
5303 .unwrap();
5304
5305 let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
5307
5308 assert!(buffer.large_file);
5310 assert_eq!(buffer.line_count(), None); assert_eq!(buffer.total_bytes(), test_data.len());
5314 assert!(!buffer.is_empty());
5315 assert_eq!(buffer.len(), test_data.len());
5316
5317 let range_result = buffer.get_text_range_mut(0, 5).unwrap();
5319 assert_eq!(range_result, b"line1");
5320
5321 let range_result2 = buffer.get_text_range_mut(6, 5).unwrap();
5322 assert_eq!(range_result2, b"line2");
5323
5324 let all_text = buffer.get_all_text().unwrap();
5326 assert_eq!(all_text, test_data);
5327
5328 assert_eq!(buffer.slice_bytes(0..5), b"line1");
5330
5331 buffer.insert_bytes(0, b"prefix_".to_vec());
5334 assert_eq!(buffer.total_bytes(), test_data.len() + 7);
5335 assert!(buffer.is_modified());
5336
5337 let text_after_insert = buffer.get_all_text().unwrap();
5339 assert_eq!(&text_after_insert[0..7], b"prefix_");
5340 assert_eq!(&text_after_insert[7..12], b"line1");
5341
5342 buffer.delete_bytes(0, 7);
5344 assert_eq!(buffer.total_bytes(), test_data.len());
5345
5346 let text_after_delete = buffer.get_all_text().unwrap();
5348 assert_eq!(text_after_delete, test_data);
5349
5350 let end_offset = buffer.total_bytes();
5352 buffer.insert_bytes(end_offset, b"suffix".to_vec());
5353 assert_eq!(buffer.total_bytes(), test_data.len() + 6);
5354
5355 let final_text = buffer.get_all_text().unwrap();
5357 assert!(final_text.ends_with(b"suffix"));
5358 assert_eq!(&final_text[0..test_data.len()], test_data);
5359
5360 let pos = buffer.offset_to_position(0).unwrap();
5364 assert_eq!(pos.column, 0);
5365
5366 let offset = buffer.position_to_offset(Position { line: 0, column: 0 });
5368 assert_eq!(offset, 0);
5369
5370 let replace_result = buffer.replace_range(0..5, "START");
5372 assert!(replace_result);
5373
5374 let text_after_replace = buffer.get_all_text().unwrap();
5375 assert!(text_after_replace.starts_with(b"START"));
5376 }
5377
5378 #[test]
5379 fn test_large_file_chunk_based_loading() {
5380 let temp_dir = TempDir::new().unwrap();
5381 let file_path = temp_dir.path().join("huge.txt");
5382
5383 let chunk_size = LOAD_CHUNK_SIZE; let file_size = chunk_size * 3; let mut file = File::create(&file_path).unwrap();
5390 file.write_all(&vec![b'A'; chunk_size]).unwrap();
5391 file.write_all(&vec![b'B'; chunk_size]).unwrap();
5392 file.write_all(&vec![b'C'; chunk_size]).unwrap();
5393 file.flush().unwrap();
5394
5395 let mut buffer = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
5397
5398 assert!(buffer.large_file);
5400 assert_eq!(buffer.total_bytes(), file_size);
5401
5402 assert!(!buffer.buffers[0].is_loaded());
5404
5405 let first_chunk_data = buffer.get_text_range_mut(0, 1024).unwrap();
5407 assert_eq!(first_chunk_data.len(), 1024);
5408 assert!(first_chunk_data.iter().all(|&b| b == b'A'));
5409
5410 let second_chunk_data = buffer.get_text_range_mut(chunk_size, 1024).unwrap();
5412 assert_eq!(second_chunk_data.len(), 1024);
5413 assert!(second_chunk_data.iter().all(|&b| b == b'B'));
5414
5415 let third_chunk_data = buffer.get_text_range_mut(chunk_size * 2, 1024).unwrap();
5417 assert_eq!(third_chunk_data.len(), 1024);
5418 assert!(third_chunk_data.iter().all(|&b| b == b'C'));
5419
5420 let cross_chunk_offset = chunk_size - 512;
5423 let cross_chunk_data = buffer.get_text_range_mut(cross_chunk_offset, 1024).unwrap();
5424 assert_eq!(cross_chunk_data.len(), 1024);
5425 assert!(cross_chunk_data[..512].iter().all(|&b| b == b'A'));
5427 assert!(cross_chunk_data[512..].iter().all(|&b| b == b'B'));
5428
5429 assert!(
5432 buffer.buffers.len() > 1,
5433 "Expected multiple buffers after chunk-based loading, got {}",
5434 buffer.buffers.len()
5435 );
5436
5437 buffer.insert_bytes(0, b"PREFIX".to_vec());
5439 assert_eq!(buffer.total_bytes(), file_size + 6);
5440
5441 let after_insert = buffer.get_text_range_mut(0, 6).unwrap();
5442 assert_eq!(after_insert, b"PREFIX");
5443
5444 let after_prefix = buffer.get_text_range_mut(6, 10).unwrap();
5446 assert!(after_prefix.iter().all(|&b| b == b'A'));
5447
5448 let mut buffer2 = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
5451
5452 let chunk_read_size = 64 * 1024; let mut offset = 0;
5455 while offset < file_size {
5456 let bytes_to_read = chunk_read_size.min(file_size - offset);
5457 let chunk_data = buffer2.get_text_range_mut(offset, bytes_to_read).unwrap();
5458
5459 let first_mb_end = chunk_size;
5461 let second_mb_end = chunk_size * 2;
5462
5463 for (i, &byte) in chunk_data.iter().enumerate() {
5465 let file_offset = offset + i;
5466 let expected = if file_offset < first_mb_end {
5467 b'A'
5468 } else if file_offset < second_mb_end {
5469 b'B'
5470 } else {
5471 b'C'
5472 };
5473 assert_eq!(
5474 byte, expected,
5475 "Mismatch at file offset {}: expected {}, got {}",
5476 file_offset, expected as char, byte as char
5477 );
5478 }
5479
5480 offset += bytes_to_read;
5481 }
5482 }
5483
5484 #[test]
5488 fn test_large_file_incremental_save() {
5489 let temp_dir = TempDir::new().unwrap();
5490 let file_path = temp_dir.path().join("large_save_test.txt");
5491
5492 let chunk_size = 1000; let file_size = chunk_size * 2; let mut file = File::create(&file_path).unwrap();
5497 file.write_all(&vec![b'A'; chunk_size]).unwrap();
5499 file.write_all(&vec![b'B'; chunk_size]).unwrap();
5501 file.flush().unwrap();
5502
5503 let mut buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
5505 assert!(buffer.large_file);
5506 assert_eq!(buffer.total_bytes(), file_size);
5507
5508 let first_bytes = buffer.get_text_range_mut(0, 50).unwrap();
5510 assert!(first_bytes.iter().all(|&b| b == b'A'));
5511
5512 buffer.insert_bytes(0, b"PREFIX_".to_vec());
5514
5515 let save_path = temp_dir.path().join("saved.txt");
5517 buffer.save_to_file(&save_path).unwrap();
5518
5519 let saved_content = std::fs::read(&save_path).unwrap();
5521
5522 assert_eq!(
5524 saved_content.len(),
5525 file_size + 7,
5526 "Saved file should be {} bytes, got {}",
5527 file_size + 7,
5528 saved_content.len()
5529 );
5530
5531 assert_eq!(&saved_content[..7], b"PREFIX_", "Should start with PREFIX_");
5533
5534 assert!(
5536 saved_content[7..100].iter().all(|&b| b == b'A'),
5537 "First chunk after prefix should be A's"
5538 );
5539
5540 let second_chunk_start = 7 + chunk_size;
5542 assert!(
5543 saved_content[second_chunk_start..second_chunk_start + 100]
5544 .iter()
5545 .all(|&b| b == b'B'),
5546 "Second chunk should be B's (was unloaded, should be preserved)"
5547 );
5548 }
5549
5550 #[test]
5552 fn test_large_file_save_with_multiple_edits() {
5553 let temp_dir = TempDir::new().unwrap();
5554 let file_path = temp_dir.path().join("multi_edit.txt");
5555
5556 let mut content = Vec::new();
5558 for i in 0..100 {
5559 content.extend_from_slice(
5560 format!("Line {:04}: padding to make it longer\n", i).as_bytes(),
5561 );
5562 }
5563 let original_len = content.len();
5564 std::fs::write(&file_path, &content).unwrap();
5565
5566 let mut buffer = TextBuffer::load_from_file(&file_path, 500, test_fs()).unwrap();
5568 assert!(
5569 buffer.line_count().is_none(),
5570 "Should be in large file mode"
5571 );
5572
5573 buffer.insert_bytes(0, b"[START]".to_vec());
5575
5576 let mid_offset = original_len / 2;
5578 let _mid_bytes = buffer.get_text_range_mut(mid_offset + 7, 10).unwrap(); buffer.insert_bytes(mid_offset + 7, b"[MIDDLE]".to_vec());
5580
5581 let save_path = temp_dir.path().join("multi_edit_saved.txt");
5583 buffer.save_to_file(&save_path).unwrap();
5584
5585 let saved = std::fs::read_to_string(&save_path).unwrap();
5587
5588 assert!(
5589 saved.starts_with("[START]Line 0000"),
5590 "Should start with our edit"
5591 );
5592 assert!(saved.contains("[MIDDLE]"), "Should contain middle edit");
5593 assert!(saved.contains("Line 0099"), "Should preserve end of file");
5594
5595 let expected_len = original_len + 7 + 8; assert_eq!(
5598 saved.len(),
5599 expected_len,
5600 "Length should be original + edits"
5601 );
5602 }
5603 }
5604
5605 #[test]
5609 fn test_offset_to_position_simple() {
5610 let content = b"a\nb\nc\nd";
5616 let buffer = TextBuffer::from_bytes(content.to_vec(), test_fs());
5617
5618 let pos = buffer
5620 .offset_to_position(0)
5621 .expect("small buffer should have line metadata");
5622 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5623 assert_eq!(pos.column, 0);
5624
5625 let pos = buffer
5626 .offset_to_position(1)
5627 .expect("small buffer should have line metadata");
5628 assert_eq!(pos.line, 0, "Byte 1 (newline) should be on line 0");
5629 assert_eq!(pos.column, 1);
5630
5631 let pos = buffer
5632 .offset_to_position(2)
5633 .expect("small buffer should have line metadata");
5634 assert_eq!(pos.line, 1, "Byte 2 should be on line 1");
5635 assert_eq!(pos.column, 0);
5636
5637 let pos = buffer
5638 .offset_to_position(3)
5639 .expect("small buffer should have line metadata");
5640 assert_eq!(pos.line, 1, "Byte 3 (newline) should be on line 1");
5641 assert_eq!(pos.column, 1);
5642
5643 let pos = buffer
5644 .offset_to_position(4)
5645 .expect("small buffer should have line metadata");
5646 assert_eq!(pos.line, 2, "Byte 4 should be on line 2");
5647 assert_eq!(pos.column, 0);
5648
5649 let pos = buffer
5650 .offset_to_position(6)
5651 .expect("small buffer should have line metadata");
5652 assert_eq!(pos.line, 3, "Byte 6 should be on line 3");
5653 assert_eq!(pos.column, 0);
5654 }
5655
5656 #[test]
5657 fn test_offset_to_position_after_insert() {
5658 let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
5660
5661 buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
5663
5664 let pos = buffer
5670 .offset_to_position(0)
5671 .expect("small buffer should have line metadata");
5672 assert_eq!(pos.line, 0, "Byte 0 should still be on line 0");
5673
5674 let pos = buffer
5675 .offset_to_position(2)
5676 .expect("small buffer should have line metadata");
5677 assert_eq!(
5678 pos.line, 1,
5679 "Byte 2 (start of inserted line) should be on line 1"
5680 );
5681
5682 let pos = buffer
5683 .offset_to_position(4)
5684 .expect("small buffer should have line metadata");
5685 assert_eq!(
5686 pos.line, 2,
5687 "Byte 4 (start of 'b') should be on line 2 after insert"
5688 );
5689 }
5690
5691 #[test]
5692 fn test_offset_to_position_empty_lines() {
5693 let buffer = TextBuffer::from_bytes(b"\n\n\n".to_vec(), test_fs());
5695
5696 let pos = buffer
5702 .offset_to_position(0)
5703 .expect("small buffer should have line metadata");
5704 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5705
5706 let pos = buffer
5707 .offset_to_position(1)
5708 .expect("small buffer should have line metadata");
5709 assert_eq!(pos.line, 1, "Byte 1 should be on line 1");
5710
5711 let pos = buffer
5712 .offset_to_position(2)
5713 .expect("small buffer should have line metadata");
5714 assert_eq!(pos.line, 2, "Byte 2 should be on line 2");
5715
5716 let pos = buffer
5717 .offset_to_position(3)
5718 .expect("small buffer should have line metadata");
5719 assert_eq!(pos.line, 3, "Byte 3 (EOF) should be on line 3");
5720 }
5721
5722 #[test]
5723 fn test_offset_to_position_long_lines() {
5724 let mut content = Vec::new();
5726 content.extend_from_slice(b"aaaaaaaaaa\n"); content.extend_from_slice(b"bbbbbbbbbb\n"); content.extend_from_slice(b"cccccccccc"); let buffer = TextBuffer::from_bytes(content.clone(), test_fs());
5731
5732 let pos = buffer
5734 .offset_to_position(0)
5735 .expect("small buffer should have line metadata");
5736 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
5737 assert_eq!(pos.column, 0);
5738
5739 let pos = buffer
5740 .offset_to_position(11)
5741 .expect("small buffer should have line metadata");
5742 assert_eq!(pos.line, 1, "Byte 11 (start of line 1) should be on line 1");
5743 assert_eq!(pos.column, 0);
5744
5745 let pos = buffer
5746 .offset_to_position(22)
5747 .expect("small buffer should have line metadata");
5748 assert_eq!(pos.line, 2, "Byte 22 (start of line 2) should be on line 2");
5749 assert_eq!(pos.column, 0);
5750
5751 let pos = buffer
5753 .offset_to_position(5)
5754 .expect("small buffer should have line metadata");
5755 assert_eq!(pos.line, 0, "Byte 5 should be on line 0");
5756 assert_eq!(pos.column, 5);
5757
5758 let pos = buffer
5759 .offset_to_position(16)
5760 .expect("small buffer should have line metadata");
5761 assert_eq!(pos.line, 1, "Byte 16 should be on line 1");
5762 assert_eq!(pos.column, 5);
5763 }
5764
5765 #[test]
5766 fn test_line_iterator_with_offset_to_position() {
5767 let mut buffer = TextBuffer::from_bytes(b"line0\nline1\nline2\n".to_vec(), test_fs());
5769
5770 for byte_pos in 0..=buffer.len() {
5772 let iter = buffer.line_iterator(byte_pos, 80);
5773 let iter_pos = iter.current_position();
5774 let expected_line = buffer
5775 .offset_to_position(byte_pos)
5776 .expect("small buffer should have line metadata")
5777 .line;
5778 let expected_line_start = buffer.position_to_offset(Position {
5779 line: expected_line,
5780 column: 0,
5781 });
5782
5783 assert_eq!(
5784 iter_pos, expected_line_start,
5785 "LineIterator at byte {} should position at line start {} but got {}",
5786 byte_pos, expected_line_start, iter_pos
5787 );
5788 }
5789 }
5790
5791 #[test]
5792 fn test_piece_tree_line_count_after_insert() {
5793 let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
5795
5796 buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
5798
5799 let content = buffer.slice_bytes(0..buffer.len());
5801 let newline_count = content.iter().filter(|&&b| b == b'\n').count();
5802 let expected_line_count = newline_count + 1;
5803 let actual_line_count = buffer.line_count();
5804
5805 assert_eq!(
5806 actual_line_count,
5807 Some(expected_line_count),
5808 "Line count mismatch after insert"
5809 );
5810 }
5811
5812 #[test]
5813 fn test_position_to_lsp_position_after_modification() {
5814 let initial = b"fn foo(val: i32) {\n val + 1\n}\n";
5821 let mut buffer = TextBuffer::from_bytes(initial.to_vec(), test_fs());
5822
5823 let (line, char) = buffer.position_to_lsp_position(23);
5826 assert_eq!(line, 1, "Initial: position 23 should be on line 1");
5827 assert_eq!(char, 4, "Initial: position 23 should be at char 4");
5828
5829 buffer.delete_range(
5832 Position { line: 1, column: 4 },
5833 Position { line: 1, column: 7 },
5834 );
5835 buffer.insert_bytes(23, b"value".to_vec()); buffer.delete_range(
5840 Position { line: 0, column: 7 },
5841 Position {
5842 line: 0,
5843 column: 10,
5844 },
5845 );
5846 buffer.insert_bytes(7, b"value".to_vec()); let content = String::from_utf8_lossy(&buffer.get_all_text().unwrap()).to_string();
5850 assert_eq!(content, "fn foo(value: i32) {\n value + 1\n}\n");
5851
5852 let (line, char) = buffer.position_to_lsp_position(25);
5859 assert_eq!(
5860 line, 1,
5861 "After modification: position 25 should be on line 1"
5862 );
5863 assert_eq!(
5864 char, 4,
5865 "After modification: position 25 should be at char 4"
5866 );
5867
5868 let (line, char) = buffer.position_to_lsp_position(21);
5870 assert_eq!(line, 1, "Position 21 should be on line 1");
5871 assert_eq!(char, 0, "Position 21 should be at char 0 (start of line)");
5872 }
5873
5874 #[test]
5875 fn test_detect_crlf() {
5876 assert_eq!(
5877 TextBuffer::detect_line_ending(b"hello\r\nworld\r\n"),
5878 LineEnding::CRLF
5879 );
5880 }
5881
5882 #[test]
5883 fn test_detect_lf() {
5884 assert_eq!(
5885 TextBuffer::detect_line_ending(b"hello\nworld\n"),
5886 LineEnding::LF
5887 );
5888 }
5889
5890 #[test]
5891 fn test_normalize_crlf() {
5892 let input = b"hello\r\nworld\r\n".to_vec();
5893 let output = TextBuffer::normalize_line_endings(input);
5894 assert_eq!(output, b"hello\nworld\n");
5895 }
5896
5897 #[test]
5898 fn test_normalize_empty() {
5899 let input = Vec::new();
5900 let output = TextBuffer::normalize_line_endings(input);
5901 assert_eq!(output, Vec::<u8>::new());
5902 }
5903
5904 #[test]
5911 fn test_get_all_text_returns_empty_for_unloaded_buffers() {
5912 use tempfile::TempDir;
5913 let temp_dir = TempDir::new().unwrap();
5914 let file_path = temp_dir.path().join("large_test.txt");
5915
5916 let original_content = "X".repeat(50_000);
5918 std::fs::write(&file_path, &original_content).unwrap();
5919
5920 let mut buffer = TextBuffer::load_from_file(&file_path, 1024, test_fs()).unwrap();
5922 assert!(buffer.large_file, "Should be in large file mode");
5923 assert!(!buffer.buffers[0].is_loaded(), "Buffer should be unloaded");
5924
5925 buffer.insert_bytes(0, b"EDITED: ".to_vec());
5927
5928 let content_immutable = buffer.get_all_text();
5931
5932 assert!(
5935 content_immutable.is_none(),
5936 "get_all_text() should return None for large files with unloaded regions. \
5937 Got Some({} bytes) instead of None.",
5938 content_immutable.as_ref().map(|c| c.len()).unwrap_or(0)
5939 );
5940
5941 let total = buffer.total_bytes();
5943 let content_lazy = buffer.get_text_range_mut(0, total).unwrap();
5944 assert_eq!(
5945 content_lazy.len(),
5946 50_000 + 8,
5947 "get_text_range_mut() should return all content with lazy loading"
5948 );
5949 assert!(
5950 String::from_utf8_lossy(&content_lazy).starts_with("EDITED: "),
5951 "Content should start with our edit"
5952 );
5953 }
5954
5955 mod line_ending_conversion {
5958 use super::*;
5959
5960 #[test]
5961 fn test_convert_lf_to_crlf() {
5962 let input = b"Line 1\nLine 2\nLine 3\n";
5963 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
5964 assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
5965 }
5966
5967 #[test]
5968 fn test_convert_crlf_to_lf() {
5969 let input = b"Line 1\r\nLine 2\r\nLine 3\r\n";
5970 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
5971 assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
5972 }
5973
5974 #[test]
5975 fn test_convert_cr_to_lf() {
5976 let input = b"Line 1\rLine 2\rLine 3\r";
5977 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
5978 assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
5979 }
5980
5981 #[test]
5982 fn test_convert_mixed_to_crlf() {
5983 let input = b"Line 1\nLine 2\r\nLine 3\r";
5985 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
5986 assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
5987 }
5988
5989 #[test]
5990 fn test_convert_lf_to_lf_is_noop() {
5991 let input = b"Line 1\nLine 2\nLine 3\n";
5992 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
5993 assert_eq!(result, input.to_vec());
5994 }
5995
5996 #[test]
5997 fn test_convert_empty_content() {
5998 let input = b"";
5999 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
6000 assert_eq!(result, b"".to_vec());
6001 }
6002
6003 #[test]
6004 fn test_convert_no_line_endings() {
6005 let input = b"No line endings here";
6006 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
6007 assert_eq!(result, b"No line endings here".to_vec());
6008 }
6009
6010 #[test]
6011 fn test_set_line_ending_marks_modified() {
6012 let mut buffer = TextBuffer::from_bytes(b"Hello\nWorld\n".to_vec(), test_fs());
6013 assert!(!buffer.is_modified());
6014
6015 buffer.set_line_ending(LineEnding::CRLF);
6016 assert!(buffer.is_modified());
6017 }
6018
6019 #[test]
6020 fn test_set_default_line_ending_does_not_mark_modified() {
6021 let mut buffer = TextBuffer::empty(test_fs());
6022 assert!(!buffer.is_modified());
6023
6024 buffer.set_default_line_ending(LineEnding::CRLF);
6025 assert!(!buffer.is_modified());
6026 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
6027 }
6028
6029 #[test]
6030 fn test_save_to_file_converts_lf_to_crlf() {
6031 use tempfile::TempDir;
6032
6033 let temp_dir = TempDir::new().unwrap();
6034 let file_path = temp_dir.path().join("test_lf_to_crlf.txt");
6035
6036 let original_content = b"Line 1\nLine 2\nLine 3\n";
6038 std::fs::write(&file_path, original_content).unwrap();
6039
6040 let mut buffer =
6042 TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
6043 .unwrap();
6044 assert_eq!(buffer.line_ending(), LineEnding::LF);
6045
6046 buffer.set_line_ending(LineEnding::CRLF);
6048 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
6049 assert!(buffer.is_modified());
6050
6051 buffer.save_to_file(&file_path).unwrap();
6053
6054 let saved_bytes = std::fs::read(&file_path).unwrap();
6056 assert_eq!(&saved_bytes, b"Line 1\r\nLine 2\r\nLine 3\r\n");
6057 }
6058
6059 #[test]
6060 fn test_save_to_file_converts_crlf_to_lf() {
6061 use tempfile::TempDir;
6062
6063 let temp_dir = TempDir::new().unwrap();
6064 let file_path = temp_dir.path().join("test_crlf_to_lf.txt");
6065
6066 let original_content = b"Line 1\r\nLine 2\r\nLine 3\r\n";
6068 std::fs::write(&file_path, original_content).unwrap();
6069
6070 let mut buffer =
6072 TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
6073 .unwrap();
6074 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
6075
6076 buffer.set_line_ending(LineEnding::LF);
6078 assert_eq!(buffer.line_ending(), LineEnding::LF);
6079 assert!(buffer.is_modified());
6080
6081 buffer.save_to_file(&file_path).unwrap();
6083
6084 let saved_bytes = std::fs::read(&file_path).unwrap();
6086 assert_eq!(&saved_bytes, b"Line 1\nLine 2\nLine 3\n");
6087 }
6088
6089 #[test]
6090 #[cfg(unix)]
6091 fn test_save_to_unwritable_file() -> anyhow::Result<()> {
6092 if unsafe { libc::getuid() } == 0 {
6095 eprintln!("Skipping test: root bypasses file permission checks");
6096 return Ok(());
6097 }
6098 use std::fs::Permissions;
6099 use std::os::unix::fs::PermissionsExt;
6100 use tempfile::TempDir;
6101
6102 let temp_dir = TempDir::new().unwrap();
6103 let unwritable_dir = temp_dir.path().join("unwritable_dir");
6104 std::fs::create_dir(&unwritable_dir)?;
6105
6106 let file_path = unwritable_dir.join("unwritable.txt");
6107 std::fs::write(&file_path, "original content")?;
6108
6109 std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
6111
6112 let mut buffer = TextBuffer::from_bytes(b"new content".to_vec(), test_fs());
6113 let result = buffer.save_to_file(&file_path);
6114
6115 match result {
6117 Err(e) => {
6118 if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
6119 assert_eq!(sudo_err.dest_path, file_path);
6120 assert!(sudo_err.temp_path.exists());
6121 drop(std::fs::remove_file(&sudo_err.temp_path));
6123 } else {
6124 panic!("Expected SudoSaveRequired error, got: {:?}", e);
6125 }
6126 }
6127 Ok(_) => panic!("Expected error, but save succeeded"),
6128 }
6129
6130 Ok(())
6131 }
6132
6133 #[test]
6134 #[cfg(unix)]
6135 fn test_save_to_unwritable_directory() -> anyhow::Result<()> {
6136 if unsafe { libc::getuid() } == 0 {
6139 eprintln!("Skipping test: root bypasses file permission checks");
6140 return Ok(());
6141 }
6142 use std::fs::Permissions;
6143 use std::os::unix::fs::PermissionsExt;
6144 use tempfile::TempDir;
6145
6146 let temp_dir = TempDir::new().unwrap();
6147 let unwritable_dir = temp_dir.path().join("unwritable_dir");
6148 std::fs::create_dir(&unwritable_dir)?;
6149
6150 let file_path = unwritable_dir.join("test.txt");
6151
6152 std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
6154
6155 let mut buffer = TextBuffer::from_bytes(b"content".to_vec(), test_fs());
6156 let result = buffer.save_to_file(&file_path);
6157
6158 match result {
6159 Err(e) => {
6160 if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
6161 assert_eq!(sudo_err.dest_path, file_path);
6162 assert!(sudo_err.temp_path.exists());
6163 assert!(sudo_err.temp_path.starts_with(std::env::temp_dir()));
6165 drop(std::fs::remove_file(&sudo_err.temp_path));
6167 } else {
6168 panic!("Expected SudoSaveRequired error, got: {:?}", e);
6169 }
6170 }
6171 Ok(_) => panic!("Expected error, but save succeeded"),
6172 }
6173
6174 Ok(())
6175 }
6176 }
6177
6178 mod large_file_encoding_tests {
6179 use super::*;
6180
6181 #[test]
6182 fn test_large_file_encoding_confirmation_display() {
6183 let confirmation = LargeFileEncodingConfirmation {
6184 path: PathBuf::from("/test/file.txt"),
6185 file_size: 150 * 1024 * 1024, encoding: Encoding::ShiftJis,
6187 };
6188
6189 let display = format!("{}", confirmation);
6190 assert!(display.contains("150 MB"), "Display: {}", display);
6191 assert!(display.contains("Shift-JIS"), "Display: {}", display);
6192 assert!(
6193 display.contains("requires full load"),
6194 "Display: {}",
6195 display
6196 );
6197 }
6198
6199 #[test]
6200 fn test_large_file_encoding_confirmation_equality() {
6201 let a = LargeFileEncodingConfirmation {
6202 path: PathBuf::from("/test/file.txt"),
6203 file_size: 100 * 1024 * 1024,
6204 encoding: Encoding::Gb18030,
6205 };
6206 let b = LargeFileEncodingConfirmation {
6207 path: PathBuf::from("/test/file.txt"),
6208 file_size: 100 * 1024 * 1024,
6209 encoding: Encoding::Gb18030,
6210 };
6211 let c = LargeFileEncodingConfirmation {
6212 path: PathBuf::from("/test/other.txt"),
6213 file_size: 100 * 1024 * 1024,
6214 encoding: Encoding::Gb18030,
6215 };
6216
6217 assert_eq!(a, b);
6218 assert_ne!(a, c);
6219 }
6220
6221 #[test]
6222 fn test_encoding_requires_confirmation() {
6223 assert!(!Encoding::Utf8.requires_full_file_load());
6225 assert!(!Encoding::Utf8Bom.requires_full_file_load());
6226 assert!(!Encoding::Ascii.requires_full_file_load());
6227 assert!(!Encoding::Latin1.requires_full_file_load());
6228 assert!(!Encoding::Windows1252.requires_full_file_load());
6229 assert!(!Encoding::Utf16Le.requires_full_file_load());
6230 assert!(!Encoding::Utf16Be.requires_full_file_load());
6231
6232 assert!(Encoding::Gb18030.requires_full_file_load());
6234 assert!(Encoding::Gbk.requires_full_file_load());
6235 assert!(Encoding::ShiftJis.requires_full_file_load());
6236 assert!(Encoding::EucKr.requires_full_file_load());
6237 }
6238
6239 #[test]
6240 fn test_check_large_file_encoding_small_file() {
6241 use tempfile::NamedTempFile;
6242
6243 let temp = NamedTempFile::new().unwrap();
6245 std::fs::write(temp.path(), b"hello world").unwrap();
6246
6247 let result = TextBuffer::check_large_file_encoding(temp.path(), test_fs()).unwrap();
6248 assert!(
6249 result.is_none(),
6250 "Small files should not require confirmation"
6251 );
6252 }
6253
6254 #[test]
6255 fn test_large_file_encoding_error_downcast() {
6256 let confirmation = LargeFileEncodingConfirmation {
6258 path: PathBuf::from("/test/file.txt"),
6259 file_size: 200 * 1024 * 1024,
6260 encoding: Encoding::EucKr,
6261 };
6262
6263 let error: anyhow::Error = confirmation.clone().into();
6264 let downcast = error.downcast_ref::<LargeFileEncodingConfirmation>();
6265 assert!(downcast.is_some());
6266 assert_eq!(downcast.unwrap().encoding, Encoding::EucKr);
6267 }
6268 }
6269
6270 mod rebuild_pristine_saved_root_tests {
6271 use super::*;
6272 use crate::model::piece_tree::BufferLocation;
6273 use std::sync::Arc;
6274
6275 fn large_file_buffer(content: &[u8]) -> TextBuffer {
6278 let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
6279 Arc::new(crate::model::filesystem::StdFileSystem);
6280 let bytes = content.len();
6281 let buffer =
6282 crate::model::piece_tree::StringBuffer::new_loaded(0, content.to_vec(), false);
6283 let piece_tree = if bytes > 0 {
6284 crate::model::piece_tree::PieceTree::new(BufferLocation::Stored(0), 0, bytes, None)
6285 } else {
6286 crate::model::piece_tree::PieceTree::empty()
6287 };
6288 let saved_root = piece_tree.root();
6289 TextBuffer {
6290 fs,
6291 piece_tree,
6292 saved_root,
6293 buffers: vec![buffer],
6294 next_buffer_id: 1,
6295 file_path: None,
6296 modified: false,
6297 recovery_pending: false,
6298 large_file: true,
6299 line_feeds_scanned: false,
6300 is_binary: false,
6301 line_ending: LineEnding::LF,
6302 original_line_ending: LineEnding::LF,
6303 encoding: Encoding::Utf8,
6304 original_encoding: Encoding::Utf8,
6305 saved_file_size: Some(bytes),
6306 version: 0,
6307 config: BufferConfig::default(),
6308 }
6309 }
6310
6311 fn scan_line_feeds(buf: &mut TextBuffer) -> Vec<(usize, usize)> {
6313 buf.piece_tree.split_leaves_to_chunk_size(LOAD_CHUNK_SIZE);
6314 let leaves = buf.piece_tree.get_leaves();
6315 let mut updates = Vec::new();
6316 for (idx, leaf) in leaves.iter().enumerate() {
6317 if leaf.line_feed_cnt.is_some() {
6318 continue;
6319 }
6320 let count = buf.scan_leaf(leaf).unwrap();
6321 updates.push((idx, count));
6322 }
6323 updates
6324 }
6325
6326 fn make_content(size: usize) -> Vec<u8> {
6328 let line = b"abcdefghij0123456789ABCDEFGHIJ0123456789abcdefghij0123456789ABCDEFGHIJ\n";
6329 let mut out = Vec::with_capacity(size);
6330 while out.len() < size {
6331 let remaining = size - out.len();
6332 let take = remaining.min(line.len());
6333 out.extend_from_slice(&line[..take]);
6334 }
6335 out
6336 }
6337
6338 #[test]
6339 fn test_no_edits_arc_ptr_eq() {
6340 let content = make_content(2 * 1024 * 1024);
6341 let expected_lf = content.iter().filter(|&&b| b == b'\n').count();
6342 let mut buf = large_file_buffer(&content);
6343
6344 assert!(buf.line_count().is_none());
6346
6347 let updates = scan_line_feeds(&mut buf);
6348 buf.rebuild_with_pristine_saved_root(&updates);
6349
6350 assert_eq!(buf.line_count(), Some(expected_lf + 1));
6352
6353 assert!(Arc::ptr_eq(&buf.saved_root, &buf.piece_tree.root()));
6355 let diff = buf.diff_since_saved();
6356 assert!(diff.equal);
6357 assert!(buf.line_feeds_scanned);
6358 assert_eq!(buf.get_all_text().unwrap(), content);
6359 }
6360
6361 #[test]
6362 fn test_single_insertion() {
6363 let content = make_content(2 * 1024 * 1024);
6364 let mut buf = large_file_buffer(&content);
6365 let updates = scan_line_feeds(&mut buf);
6366
6367 let insert_offset = 1_000_000;
6369 let insert_text = b"INSERTED_TEXT\n";
6370 buf.insert_bytes(insert_offset, insert_text.to_vec());
6371
6372 buf.rebuild_with_pristine_saved_root(&updates);
6373
6374 let mut expected = content.clone();
6376 expected.splice(insert_offset..insert_offset, insert_text.iter().copied());
6377 assert_eq!(buf.get_all_text().unwrap(), expected);
6378
6379 let expected_lf = expected.iter().filter(|&&b| b == b'\n').count();
6381 assert_eq!(buf.line_count(), Some(expected_lf + 1));
6382
6383 let diff = buf.diff_since_saved();
6385 assert!(!diff.equal);
6386 assert!(!diff.byte_ranges.is_empty());
6387 }
6388
6389 #[test]
6394 fn test_diff_byte_ranges_are_document_absolute_after_eof_insert() {
6395 let content = make_content(4 * 1024 * 1024); let mut buf = large_file_buffer(&content);
6397 let updates = scan_line_feeds(&mut buf);
6398 buf.rebuild_with_pristine_saved_root(&updates);
6399
6400 let insert_offset = content.len() - 100;
6402 buf.insert_bytes(insert_offset, b"HELLO".to_vec());
6403
6404 let diff = buf.diff_since_saved();
6405 assert!(!diff.equal, "diff should detect the insertion");
6406 assert!(
6407 !diff.byte_ranges.is_empty(),
6408 "byte_ranges should not be empty"
6409 );
6410
6411 let first_range = &diff.byte_ranges[0];
6413 assert!(
6414 first_range.start >= content.len() - 200,
6415 "byte_ranges should be document-absolute (near EOF): got {:?}, expected near {}",
6416 first_range,
6417 insert_offset,
6418 );
6419 }
6420
6421 #[test]
6422 fn test_single_deletion() {
6423 let content = make_content(2 * 1024 * 1024);
6424 let mut buf = large_file_buffer(&content);
6425 let updates = scan_line_feeds(&mut buf);
6426
6427 let del_start = 500_000;
6429 let del_len = 1000;
6430 buf.delete_bytes(del_start, del_len);
6431
6432 buf.rebuild_with_pristine_saved_root(&updates);
6433
6434 let mut expected = content.clone();
6435 expected.drain(del_start..del_start + del_len);
6436 assert_eq!(buf.get_all_text().unwrap(), expected);
6437
6438 let diff = buf.diff_since_saved();
6439 assert!(!diff.equal);
6440 }
6441
6442 #[test]
6443 fn test_insert_and_delete() {
6444 let content = make_content(2 * 1024 * 1024);
6445 let mut buf = large_file_buffer(&content);
6446 let updates = scan_line_feeds(&mut buf);
6447
6448 let del_start = 100_000;
6450 let del_len = 500;
6451 buf.delete_bytes(del_start, del_len);
6452
6453 let insert_offset = 1_500_000; let insert_text = b"NEW_CONTENT\n";
6455 buf.insert_bytes(insert_offset, insert_text.to_vec());
6456
6457 buf.rebuild_with_pristine_saved_root(&updates);
6458
6459 let mut expected = content.clone();
6461 expected.drain(del_start..del_start + del_len);
6462 expected.splice(insert_offset..insert_offset, insert_text.iter().copied());
6463 assert_eq!(buf.get_all_text().unwrap(), expected);
6464
6465 let diff = buf.diff_since_saved();
6466 assert!(!diff.equal);
6467 }
6468
6469 #[test]
6470 fn test_multiple_scattered_edits() {
6471 let content = make_content(3 * 1024 * 1024);
6472 let mut buf = large_file_buffer(&content);
6473 let updates = scan_line_feeds(&mut buf);
6474 let mut expected = content.clone();
6475
6476 buf.delete_bytes(100_000, 200);
6479 expected.drain(100_000..100_200);
6480
6481 buf.insert_bytes(500_000, b"AAAA\n".to_vec());
6483 expected.splice(500_000..500_000, b"AAAA\n".iter().copied());
6484
6485 buf.delete_bytes(2_000_000, 300);
6487 expected.drain(2_000_000..2_000_300);
6488
6489 buf.insert_bytes(1_000_000, b"BBBB\n".to_vec());
6491 expected.splice(1_000_000..1_000_000, b"BBBB\n".iter().copied());
6492
6493 buf.rebuild_with_pristine_saved_root(&updates);
6494
6495 assert_eq!(buf.get_all_text().unwrap(), expected);
6496 let diff = buf.diff_since_saved();
6497 assert!(!diff.equal);
6498 }
6499
6500 #[test]
6501 fn test_content_preserved_after_rebuild() {
6502 let content = make_content(2 * 1024 * 1024);
6505 let mut buf = large_file_buffer(&content);
6506 let updates = scan_line_feeds(&mut buf);
6507
6508 buf.insert_bytes(0, b"HEADER\n".to_vec());
6509 buf.delete_bytes(1_000_000, 500);
6510
6511 let text_before = buf.get_all_text().unwrap();
6512 buf.rebuild_with_pristine_saved_root(&updates);
6513 let text_after = buf.get_all_text().unwrap();
6514
6515 assert_eq!(text_before, text_after);
6516 }
6517
6518 fn large_file_buffer_unloaded(path: &std::path::Path, file_size: usize) -> TextBuffer {
6521 let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
6522 Arc::new(crate::model::filesystem::StdFileSystem);
6523 let buffer = crate::model::piece_tree::StringBuffer::new_unloaded(
6524 0,
6525 path.to_path_buf(),
6526 0,
6527 file_size,
6528 );
6529 let piece_tree = if file_size > 0 {
6530 crate::model::piece_tree::PieceTree::new(
6531 BufferLocation::Stored(0),
6532 0,
6533 file_size,
6534 None,
6535 )
6536 } else {
6537 crate::model::piece_tree::PieceTree::empty()
6538 };
6539 let saved_root = piece_tree.root();
6540 TextBuffer {
6541 fs,
6542 piece_tree,
6543 saved_root,
6544 buffers: vec![buffer],
6545 next_buffer_id: 1,
6546 file_path: Some(path.to_path_buf()),
6547 modified: false,
6548 recovery_pending: false,
6549 large_file: true,
6550 line_feeds_scanned: false,
6551 is_binary: false,
6552 line_ending: LineEnding::LF,
6553 original_line_ending: LineEnding::LF,
6554 encoding: Encoding::Utf8,
6555 original_encoding: Encoding::Utf8,
6556 saved_file_size: Some(file_size),
6557 version: 0,
6558 config: BufferConfig::default(),
6559 }
6560 }
6561
6562 #[test]
6563 fn test_unloaded_buffer_no_edits_line_count() {
6564 let content = make_content(2 * 1024 * 1024);
6565 let expected_lf = content.iter().filter(|&&b| b == b'\n').count();
6566
6567 let tmp = tempfile::NamedTempFile::new().unwrap();
6568 std::fs::write(tmp.path(), &content).unwrap();
6569 let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6570
6571 assert!(
6572 buf.line_count().is_none(),
6573 "before scan, line_count should be None"
6574 );
6575
6576 let updates = scan_line_feeds(&mut buf);
6577 buf.rebuild_with_pristine_saved_root(&updates);
6578
6579 assert_eq!(
6580 buf.line_count(),
6581 Some(expected_lf + 1),
6582 "after rebuild, line_count must be exact"
6583 );
6584 assert!(buf.line_feeds_scanned);
6585 }
6586
6587 #[test]
6588 fn test_unloaded_buffer_with_edits_line_count() {
6589 let content = make_content(2 * 1024 * 1024);
6590
6591 let tmp = tempfile::NamedTempFile::new().unwrap();
6592 std::fs::write(tmp.path(), &content).unwrap();
6593 let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6594
6595 let updates = scan_line_feeds(&mut buf);
6596
6597 let insert_text = b"INSERTED\n";
6599 buf.insert_bytes(1_000_000, insert_text.to_vec());
6600
6601 buf.rebuild_with_pristine_saved_root(&updates);
6602
6603 let mut expected = content.clone();
6604 expected.splice(1_000_000..1_000_000, insert_text.iter().copied());
6605 let expected_lf = expected.iter().filter(|&&b| b == b'\n').count();
6606
6607 assert_eq!(
6608 buf.line_count(),
6609 Some(expected_lf + 1),
6610 "after rebuild with edits, line_count must be exact"
6611 );
6612 assert!(buf.line_feeds_scanned);
6613 }
6614
6615 #[test]
6620 fn test_diff_efficiency_after_rebuild() {
6621 let content = make_content(32 * 1024 * 1024);
6624 let mut buf = large_file_buffer(&content);
6625
6626 let updates = scan_line_feeds(&mut buf);
6627
6628 buf.insert_bytes(1_000_000, b"HELLO".to_vec());
6630
6631 buf.rebuild_with_pristine_saved_root(&updates);
6632
6633 let diff = buf.diff_since_saved();
6634 assert!(!diff.equal);
6635
6636 let total_leaves = buf.piece_tree.get_leaves().len();
6637 assert!(
6642 diff.nodes_visited < total_leaves,
6643 "diff visited {} nodes but tree has {} leaves — \
6644 Arc::ptr_eq short-circuiting is not working",
6645 diff.nodes_visited,
6646 total_leaves,
6647 );
6648 }
6649
6650 #[test]
6657 fn test_viewport_load_after_rebuild_does_not_load_entire_file() {
6658 use std::sync::atomic::{AtomicUsize, Ordering};
6659
6660 struct TrackingFs {
6662 inner: crate::model::filesystem::StdFileSystem,
6663 max_read_range_len: Arc<AtomicUsize>,
6664 }
6665
6666 impl crate::model::filesystem::FileSystem for TrackingFs {
6667 fn read_file(&self, path: &Path) -> std::io::Result<Vec<u8>> {
6668 self.inner.read_file(path)
6669 }
6670 fn read_range(
6671 &self,
6672 path: &Path,
6673 offset: u64,
6674 len: usize,
6675 ) -> std::io::Result<Vec<u8>> {
6676 self.max_read_range_len.fetch_max(len, Ordering::SeqCst);
6677 self.inner.read_range(path, offset, len)
6678 }
6679 fn write_file(&self, path: &Path, data: &[u8]) -> std::io::Result<()> {
6680 self.inner.write_file(path, data)
6681 }
6682 fn create_file(
6683 &self,
6684 path: &Path,
6685 ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6686 {
6687 self.inner.create_file(path)
6688 }
6689 fn open_file(
6690 &self,
6691 path: &Path,
6692 ) -> std::io::Result<Box<dyn crate::model::filesystem::FileReader>>
6693 {
6694 self.inner.open_file(path)
6695 }
6696 fn open_file_for_write(
6697 &self,
6698 path: &Path,
6699 ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6700 {
6701 self.inner.open_file_for_write(path)
6702 }
6703 fn open_file_for_append(
6704 &self,
6705 path: &Path,
6706 ) -> std::io::Result<Box<dyn crate::model::filesystem::FileWriter>>
6707 {
6708 self.inner.open_file_for_append(path)
6709 }
6710 fn set_file_length(&self, path: &Path, len: u64) -> std::io::Result<()> {
6711 self.inner.set_file_length(path, len)
6712 }
6713 fn rename(&self, from: &Path, to: &Path) -> std::io::Result<()> {
6714 self.inner.rename(from, to)
6715 }
6716 fn copy(&self, from: &Path, to: &Path) -> std::io::Result<u64> {
6717 self.inner.copy(from, to)
6718 }
6719 fn remove_file(&self, path: &Path) -> std::io::Result<()> {
6720 self.inner.remove_file(path)
6721 }
6722 fn remove_dir(&self, path: &Path) -> std::io::Result<()> {
6723 self.inner.remove_dir(path)
6724 }
6725 fn metadata(
6726 &self,
6727 path: &Path,
6728 ) -> std::io::Result<crate::model::filesystem::FileMetadata> {
6729 self.inner.metadata(path)
6730 }
6731 fn symlink_metadata(
6732 &self,
6733 path: &Path,
6734 ) -> std::io::Result<crate::model::filesystem::FileMetadata> {
6735 self.inner.symlink_metadata(path)
6736 }
6737 fn is_dir(&self, path: &Path) -> std::io::Result<bool> {
6738 self.inner.is_dir(path)
6739 }
6740 fn is_file(&self, path: &Path) -> std::io::Result<bool> {
6741 self.inner.is_file(path)
6742 }
6743 fn set_permissions(
6744 &self,
6745 path: &Path,
6746 permissions: &crate::model::filesystem::FilePermissions,
6747 ) -> std::io::Result<()> {
6748 self.inner.set_permissions(path, permissions)
6749 }
6750 fn is_owner(&self, path: &Path) -> bool {
6751 self.inner.is_owner(path)
6752 }
6753 fn read_dir(
6754 &self,
6755 path: &Path,
6756 ) -> std::io::Result<Vec<crate::model::filesystem::DirEntry>> {
6757 self.inner.read_dir(path)
6758 }
6759 fn create_dir(&self, path: &Path) -> std::io::Result<()> {
6760 self.inner.create_dir(path)
6761 }
6762 fn create_dir_all(&self, path: &Path) -> std::io::Result<()> {
6763 self.inner.create_dir_all(path)
6764 }
6765 fn canonicalize(&self, path: &Path) -> std::io::Result<PathBuf> {
6766 self.inner.canonicalize(path)
6767 }
6768 fn current_uid(&self) -> u32 {
6769 self.inner.current_uid()
6770 }
6771 fn sudo_write(
6772 &self,
6773 path: &Path,
6774 data: &[u8],
6775 mode: u32,
6776 uid: u32,
6777 gid: u32,
6778 ) -> std::io::Result<()> {
6779 self.inner.sudo_write(path, data, mode, uid, gid)
6780 }
6781 fn search_file(
6782 &self,
6783 path: &Path,
6784 pattern: &str,
6785 opts: &crate::model::filesystem::FileSearchOptions,
6786 cursor: &mut crate::model::filesystem::FileSearchCursor,
6787 ) -> std::io::Result<Vec<SearchMatch>> {
6788 crate::model::filesystem::default_search_file(
6789 &self.inner,
6790 path,
6791 pattern,
6792 opts,
6793 cursor,
6794 )
6795 }
6796 }
6797
6798 let file_size = LOAD_CHUNK_SIZE * 3;
6800 let content = make_content(file_size);
6801
6802 let tmp = tempfile::NamedTempFile::new().unwrap();
6803 std::fs::write(tmp.path(), &content).unwrap();
6804
6805 let max_read = Arc::new(AtomicUsize::new(0));
6806 let fs: Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> =
6807 Arc::new(TrackingFs {
6808 inner: crate::model::filesystem::StdFileSystem,
6809 max_read_range_len: max_read.clone(),
6810 });
6811
6812 let buffer = crate::model::piece_tree::StringBuffer::new_unloaded(
6814 0,
6815 tmp.path().to_path_buf(),
6816 0,
6817 file_size,
6818 );
6819 let piece_tree = PieceTree::new(BufferLocation::Stored(0), 0, file_size, None);
6820 let saved_root = piece_tree.root();
6821 let mut buf = TextBuffer {
6822 fs,
6823 piece_tree,
6824 saved_root,
6825 buffers: vec![buffer],
6826 next_buffer_id: 1,
6827 file_path: Some(tmp.path().to_path_buf()),
6828 modified: false,
6829 recovery_pending: false,
6830 large_file: true,
6831 line_feeds_scanned: false,
6832 is_binary: false,
6833 line_ending: LineEnding::LF,
6834 original_line_ending: LineEnding::LF,
6835 encoding: Encoding::Utf8,
6836 original_encoding: Encoding::Utf8,
6837 saved_file_size: Some(file_size),
6838 version: 0,
6839 config: BufferConfig::default(),
6840 };
6841
6842 let viewport_offset = LOAD_CHUNK_SIZE + 100; buf.get_text_range_mut(viewport_offset, 4096).unwrap();
6845
6846 let updates = scan_line_feeds(&mut buf);
6848 buf.rebuild_with_pristine_saved_root(&updates);
6849
6850 max_read.store(0, Ordering::SeqCst);
6852
6853 buf.get_text_range_mut(viewport_offset, 4096).unwrap();
6855
6856 let largest_read = max_read.load(Ordering::SeqCst);
6857 assert!(
6858 largest_read <= LOAD_CHUNK_SIZE,
6859 "After rebuild, loading a viewport triggered a read of {} bytes \
6860 (file_size={}). This means the entire Stored buffer is being \
6861 loaded instead of just the needed chunk.",
6862 largest_read,
6863 file_size,
6864 );
6865 }
6866
6867 #[test]
6873 fn test_viewport_load_after_rebuild_preserves_line_counts() {
6874 let file_size = LOAD_CHUNK_SIZE * 3;
6875 let content = make_content(file_size);
6876
6877 let tmp = tempfile::NamedTempFile::new().unwrap();
6878 std::fs::write(tmp.path(), &content).unwrap();
6879 let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6880
6881 let updates = scan_line_feeds(&mut buf);
6883 buf.rebuild_with_pristine_saved_root(&updates);
6884
6885 let line_count_before = buf.piece_tree.line_count();
6886 assert!(
6887 line_count_before.is_some(),
6888 "line_count must be Some after rebuild"
6889 );
6890
6891 let mid_piece_offset = LOAD_CHUNK_SIZE + LOAD_CHUNK_SIZE / 2;
6894 buf.get_text_range_mut(mid_piece_offset, 4096).unwrap();
6895
6896 let line_count_after = buf.piece_tree.line_count();
6897 assert!(
6898 line_count_after.is_some(),
6899 "line_count must still be Some after viewport load \
6900 (was {:?} before, now {:?})",
6901 line_count_before,
6902 line_count_after,
6903 );
6904 assert_eq!(
6905 line_count_before, line_count_after,
6906 "line_count must not change after viewport load"
6907 );
6908 }
6909
6910 #[test]
6912 fn test_diff_efficiency_after_rebuild_unloaded() {
6913 let content = make_content(32 * 1024 * 1024);
6914
6915 let tmp = tempfile::NamedTempFile::new().unwrap();
6916 std::fs::write(tmp.path(), &content).unwrap();
6917 let mut buf = large_file_buffer_unloaded(tmp.path(), content.len());
6918
6919 let updates = scan_line_feeds(&mut buf);
6920
6921 buf.insert_bytes(1_000_000, b"HELLO".to_vec());
6922
6923 buf.rebuild_with_pristine_saved_root(&updates);
6924
6925 let diff = buf.diff_since_saved();
6926 assert!(!diff.equal);
6927
6928 let total_leaves = buf.piece_tree.get_leaves().len();
6929 assert!(
6930 diff.nodes_visited < total_leaves,
6931 "diff visited {} nodes but tree has {} leaves — \
6932 Arc::ptr_eq short-circuiting is not working (unloaded path)",
6933 diff.nodes_visited,
6934 total_leaves,
6935 );
6936 }
6937 }
6938
6939 mod chunked_search {
6940 use super::*;
6941
6942 fn make_buffer(content: &[u8]) -> TextBuffer {
6943 TextBuffer::from_bytes(content.to_vec(), test_fs())
6944 }
6945
6946 fn make_regex(pattern: &str) -> regex::bytes::Regex {
6947 regex::bytes::Regex::new(pattern).unwrap()
6948 }
6949
6950 #[test]
6951 fn single_chunk_line_col_context() {
6952 let mut buf = make_buffer(b"hello world\nfoo bar\nbaz quux\n");
6953 let state = buf.search_scan_all(make_regex("bar"), 100, 3).unwrap();
6954 assert_eq!(state.matches.len(), 1);
6955 let m = &state.matches[0];
6956 assert_eq!(m.line, 2);
6957 assert_eq!(m.column, 5); assert_eq!(m.context, "foo bar");
6959 assert_eq!(m.byte_offset, 16); assert_eq!(m.length, 3);
6961 }
6962
6963 #[test]
6964 fn multiple_matches_correct_lines() {
6965 let mut buf = make_buffer(b"aaa\nbbb\nccc\naaa\n");
6966 let state = buf.search_scan_all(make_regex("aaa"), 100, 3).unwrap();
6967 assert_eq!(state.matches.len(), 2);
6968 assert_eq!(state.matches[0].line, 1);
6969 assert_eq!(state.matches[0].context, "aaa");
6970 assert_eq!(state.matches[1].line, 4);
6971 assert_eq!(state.matches[1].context, "aaa");
6972 }
6973
6974 #[test]
6975 fn match_on_last_line_no_trailing_newline() {
6976 let mut buf = make_buffer(b"line1\nline2\ntarget");
6977 let state = buf.search_scan_all(make_regex("target"), 100, 6).unwrap();
6978 assert_eq!(state.matches.len(), 1);
6979 let m = &state.matches[0];
6980 assert_eq!(m.line, 3);
6981 assert_eq!(m.column, 1);
6982 assert_eq!(m.context, "target");
6983 }
6984
6985 #[test]
6986 fn match_at_first_byte() {
6987 let mut buf = make_buffer(b"target\nother\n");
6988 let state = buf.search_scan_all(make_regex("target"), 100, 6).unwrap();
6989 assert_eq!(state.matches.len(), 1);
6990 let m = &state.matches[0];
6991 assert_eq!(m.line, 1);
6992 assert_eq!(m.column, 1);
6993 assert_eq!(m.byte_offset, 0);
6994 }
6995
6996 #[test]
6997 fn max_matches_caps() {
6998 let mut buf = make_buffer(b"a\na\na\na\na\n");
6999 let state = buf.search_scan_all(make_regex("a"), 3, 1).unwrap();
7000 assert_eq!(state.matches.len(), 3);
7001 assert!(state.capped);
7002 }
7003
7004 #[test]
7005 fn case_insensitive_regex() {
7006 let mut buf = make_buffer(b"Hello\nhello\nHELLO\n");
7007 let state = buf
7008 .search_scan_all(make_regex("(?i)hello"), 100, 5)
7009 .unwrap();
7010 assert_eq!(state.matches.len(), 3);
7011 assert_eq!(state.matches[0].line, 1);
7012 assert_eq!(state.matches[1].line, 2);
7013 assert_eq!(state.matches[2].line, 3);
7014 }
7015
7016 #[test]
7017 fn whole_word_boundary() {
7018 let mut buf = make_buffer(b"foobar\nfoo bar\nfoo\n");
7019 let state = buf.search_scan_all(make_regex(r"\bfoo\b"), 100, 3).unwrap();
7020 assert_eq!(state.matches.len(), 2);
7021 assert_eq!(state.matches[0].line, 2);
7022 assert_eq!(state.matches[0].column, 1);
7023 assert_eq!(state.matches[1].line, 3);
7024 }
7025
7026 #[test]
7030 fn multi_chunk_line_numbers_correct() {
7031 let mut content = Vec::new();
7033 for i in 1..=100 {
7034 content.extend_from_slice(format!("line_{:03}\n", i).as_bytes());
7035 }
7036
7037 let temp_dir = tempfile::TempDir::new().unwrap();
7040 let path = temp_dir.path().join("test.txt");
7041 std::fs::write(&path, &content).unwrap();
7042 let mut buffer = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7043
7044 let state = buffer
7045 .search_scan_all(make_regex("line_050"), 100, 8)
7046 .unwrap();
7047 assert_eq!(state.matches.len(), 1);
7048 let m = &state.matches[0];
7049 assert_eq!(m.line, 50);
7050 assert_eq!(m.column, 1);
7051 assert_eq!(m.context, "line_050");
7052 }
7053
7054 #[test]
7057 fn multi_chunk_no_duplicate_matches() {
7058 let mut content = Vec::new();
7059 for i in 1..=100 {
7060 content.extend_from_slice(format!("word_{:03}\n", i).as_bytes());
7061 }
7062
7063 let temp_dir = tempfile::TempDir::new().unwrap();
7064 let path = temp_dir.path().join("test.txt");
7065 std::fs::write(&path, &content).unwrap();
7066 let mut buffer = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7067
7068 let state = buffer.search_scan_all(make_regex("word_"), 200, 5).unwrap();
7070 assert_eq!(
7071 state.matches.len(),
7072 100,
7073 "Should find exactly 100 matches (one per line), no duplicates"
7074 );
7075
7076 for (i, m) in state.matches.iter().enumerate() {
7078 assert_eq!(
7079 m.line,
7080 i + 1,
7081 "Match {} should be on line {}, got {}",
7082 i,
7083 i + 1,
7084 m.line
7085 );
7086 }
7087 }
7088
7089 #[test]
7093 fn overlap_mid_line_line_numbers() {
7094 let mut content = Vec::new();
7097 content.extend_from_slice(b"short\n");
7098 content.extend_from_slice(b"AAAA_");
7100 for _ in 0..50 {
7101 content.extend_from_slice(b"BBBBBBBBBB"); }
7103 content.extend_from_slice(b"_TARGET_HERE\n");
7104 content.extend_from_slice(b"after\n");
7105
7106 let temp_dir = tempfile::TempDir::new().unwrap();
7107 let path = temp_dir.path().join("test.txt");
7108 std::fs::write(&path, &content).unwrap();
7109 let mut buffer = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7110
7111 let state = buffer
7112 .search_scan_all(make_regex("TARGET_HERE"), 100, 11)
7113 .unwrap();
7114 assert_eq!(state.matches.len(), 1);
7115 let m = &state.matches[0];
7116 assert_eq!(m.line, 2, "TARGET_HERE is on line 2 (the long line)");
7117 assert_eq!(m.length, 11);
7118
7119 let state2 = buffer.search_scan_all(make_regex("after"), 100, 5).unwrap();
7121 assert_eq!(state2.matches.len(), 1);
7122 assert_eq!(state2.matches[0].line, 3);
7123 }
7124
7125 #[test]
7128 fn match_spanning_chunk_boundary() {
7129 let mut content = Vec::new();
7131 content.extend_from_slice(b"line1\n");
7132 for _ in 0..60 {
7134 content.extend_from_slice(b"XXXXXXXXXX"); }
7136 content.extend_from_slice(b"SPLIT\n");
7137 content.extend_from_slice(b"end\n");
7138
7139 let temp_dir = tempfile::TempDir::new().unwrap();
7140 let path = temp_dir.path().join("test.txt");
7141 std::fs::write(&path, &content).unwrap();
7142 let mut buffer = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7143
7144 let state = buffer.search_scan_all(make_regex("SPLIT"), 100, 5).unwrap();
7145 assert_eq!(state.matches.len(), 1, "SPLIT should be found exactly once");
7146 assert_eq!(state.matches[0].line, 2); }
7148
7149 #[test]
7150 fn empty_buffer_no_matches() {
7151 let mut buf = make_buffer(b"");
7152 let state = buf.search_scan_all(make_regex("anything"), 100, 8).unwrap();
7153 assert!(state.matches.is_empty());
7154 assert!(!state.capped);
7155 }
7156
7157 #[test]
7158 fn single_line_no_newline() {
7159 let mut buf = make_buffer(b"hello world");
7160 let state = buf.search_scan_all(make_regex("world"), 100, 5).unwrap();
7161 assert_eq!(state.matches.len(), 1);
7162 let m = &state.matches[0];
7163 assert_eq!(m.line, 1);
7164 assert_eq!(m.column, 7);
7165 assert_eq!(m.context, "hello world");
7166 }
7167
7168 #[test]
7171 fn multiple_matches_same_line() {
7172 let mut buf = make_buffer(b"aa bb aa cc aa\nother\n");
7173 let state = buf.search_scan_all(make_regex("aa"), 100, 2).unwrap();
7174 assert_eq!(state.matches.len(), 3);
7175 for m in &state.matches {
7176 assert_eq!(m.line, 1);
7177 assert_eq!(m.context, "aa bb aa cc aa");
7178 }
7179 assert_eq!(state.matches[0].column, 1);
7180 assert_eq!(state.matches[1].column, 7);
7181 assert_eq!(state.matches[2].column, 13);
7182 }
7183 }
7184
7185 mod hybrid_search {
7186 use super::*;
7187
7188 fn make_regex(pattern: &str) -> regex::bytes::Regex {
7189 regex::bytes::Regex::new(pattern).unwrap()
7190 }
7191
7192 fn make_opts() -> crate::model::filesystem::FileSearchOptions {
7193 crate::model::filesystem::FileSearchOptions {
7194 fixed_string: false,
7195 case_sensitive: true,
7196 whole_word: false,
7197 max_matches: 100,
7198 }
7199 }
7200
7201 #[test]
7204 fn hybrid_matches_scan_all_for_loaded_buffer() {
7205 let content = b"foo bar baz\nfoo again\nlast line\n";
7206 let mut buf = TextBuffer::from_bytes(content.to_vec(), test_fs());
7207 let regex = make_regex("foo");
7208 let opts = make_opts();
7209
7210 let hybrid = buf
7211 .search_hybrid("foo", &opts, regex.clone(), 100, 3)
7212 .unwrap();
7213 let scan = buf.search_scan_all(regex, 100, 3).unwrap();
7214
7215 assert_eq!(hybrid.len(), scan.matches.len());
7216 for (h, s) in hybrid.iter().zip(scan.matches.iter()) {
7217 assert_eq!(h.byte_offset, s.byte_offset);
7218 assert_eq!(h.line, s.line);
7219 assert_eq!(h.column, s.column);
7220 assert_eq!(h.length, s.length);
7221 assert_eq!(h.context, s.context);
7222 }
7223 }
7224
7225 #[test]
7228 fn hybrid_finds_matches_in_unloaded_regions() {
7229 let temp_dir = tempfile::TempDir::new().unwrap();
7230 let path = temp_dir.path().join("big.txt");
7231
7232 let mut content = Vec::new();
7234 for i in 0..100 {
7235 content.extend_from_slice(format!("line {:03}\n", i).as_bytes());
7236 }
7237 std::fs::write(&path, &content).unwrap();
7238
7239 let mut buf = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7241
7242 let leaves = buf.piece_tree.get_leaves();
7244 let has_unloaded = leaves.iter().any(|l| {
7245 matches!(l.location, BufferLocation::Stored(_))
7246 && buf
7247 .buffers
7248 .get(l.location.buffer_id())
7249 .map(|b| !b.is_loaded())
7250 .unwrap_or(false)
7251 });
7252
7253 let regex = make_regex("line 050");
7254 let opts = make_opts();
7255 let matches = buf.search_hybrid("line 050", &opts, regex, 100, 8).unwrap();
7256
7257 assert_eq!(matches.len(), 1);
7258 assert_eq!(matches[0].line, 51); assert!(matches[0].context.contains("line 050"));
7260 if has_unloaded {
7262 }
7264 }
7265
7266 #[test]
7269 fn hybrid_dirty_buffer_finds_all_matches() {
7270 let temp_dir = tempfile::TempDir::new().unwrap();
7271 let path = temp_dir.path().join("dirty.txt");
7272
7273 let mut content = Vec::new();
7274 for i in 0..50 {
7275 content.extend_from_slice(format!("target {:02}\n", i).as_bytes());
7276 }
7277 std::fs::write(&path, &content).unwrap();
7278
7279 let mut buf = TextBuffer::load_from_file(&path, 10, test_fs()).unwrap();
7280
7281 buf.insert(0, "target XX\n");
7283
7284 let regex = make_regex("target");
7285 let opts = make_opts();
7286 let matches = buf.search_hybrid("target", &opts, regex, 200, 6).unwrap();
7287
7288 assert_eq!(matches.len(), 51);
7290 assert!(matches[0].context.contains("target XX"));
7292 }
7293
7294 #[test]
7296 fn hybrid_boundary_match() {
7297 let temp_dir = tempfile::TempDir::new().unwrap();
7298 let path = temp_dir.path().join("boundary.txt");
7299
7300 let content = b"AAAAABBBBB";
7302 std::fs::write(&path, content).unwrap();
7303
7304 let mut buf = TextBuffer::from_bytes(content.to_vec(), test_fs());
7305 buf.rename_file_path(path);
7306
7307 let regex = make_regex("AAAAABBBBB");
7308 let opts = make_opts();
7309 let matches = buf
7310 .search_hybrid("AAAAABBBBB", &opts, regex, 100, 10)
7311 .unwrap();
7312
7313 assert_eq!(matches.len(), 1);
7314 assert_eq!(matches[0].byte_offset, 0);
7315 }
7316
7317 #[test]
7319 fn hybrid_max_matches_respected() {
7320 let content = b"aaa\naaa\naaa\naaa\naaa\n";
7321 let mut buf = TextBuffer::from_bytes(content.to_vec(), test_fs());
7322 let regex = make_regex("aaa");
7323 let opts = crate::model::filesystem::FileSearchOptions {
7324 max_matches: 3,
7325 ..make_opts()
7326 };
7327 let matches = buf.search_hybrid("aaa", &opts, regex, 3, 3).unwrap();
7328 assert!(matches.len() <= 3);
7329 }
7330 }
7331
7332 mod boundary_overlap {
7333 use super::*;
7334
7335 fn make_regex(pattern: &str) -> regex::bytes::Regex {
7336 regex::bytes::Regex::new(pattern).unwrap()
7337 }
7338
7339 #[test]
7340 fn empty_prev_tail_returns_nothing() {
7341 let matches = search_boundary_overlap(b"", b"hello", 0, 1, &make_regex("hello"), 100);
7342 assert!(matches.is_empty());
7343 }
7344
7345 #[test]
7346 fn pure_tail_match_skipped() {
7347 let matches =
7349 search_boundary_overlap(b"foo bar", b" baz", 0, 1, &make_regex("foo"), 100);
7350 assert!(matches.is_empty());
7351 }
7352
7353 #[test]
7354 fn cross_boundary_match_found() {
7355 let matches =
7357 search_boundary_overlap(b"xxSPL", b"ITyy", 0, 1, &make_regex("SPLIT"), 100);
7358 assert_eq!(matches.len(), 1);
7359 assert_eq!(matches[0].byte_offset, 2);
7360 assert_eq!(matches[0].length, 5);
7361 }
7362
7363 #[test]
7364 fn pure_head_match_skipped() {
7365 let matches = search_boundary_overlap(b"foo", b" baz", 0, 1, &make_regex("baz"), 100);
7368 assert!(matches.is_empty());
7369 }
7370
7371 #[test]
7372 fn line_number_tracking() {
7373 let matches =
7377 search_boundary_overlap(b"line1\nSPL", b"IT end", 0, 5, &make_regex("SPLIT"), 100);
7378 assert_eq!(matches.len(), 1);
7379 assert_eq!(matches[0].line, 5);
7380 }
7381
7382 #[test]
7383 fn max_matches_respected() {
7384 let matches = search_boundary_overlap(b"aXb", b"Xc", 0, 1, &make_regex("X"), 1);
7386 assert!(matches.len() <= 1);
7387 }
7388 }
7389}
7390
7391#[cfg(test)]
7392mod property_tests {
7393 use crate::model::filesystem::StdFileSystem;
7394 use std::sync::Arc;
7395
7396 fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
7397 Arc::new(StdFileSystem)
7398 }
7399 use super::*;
7400 use proptest::prelude::*;
7401
7402 fn text_with_newlines() -> impl Strategy<Value = Vec<u8>> {
7404 prop::collection::vec(
7405 prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n'),],
7406 0..100,
7407 )
7408 }
7409
7410 #[derive(Debug, Clone)]
7412 enum Operation {
7413 Insert { offset: usize, text: Vec<u8> },
7414 Delete { offset: usize, bytes: usize },
7415 }
7416
7417 fn operation_strategy() -> impl Strategy<Value = Vec<Operation>> {
7418 prop::collection::vec(
7419 prop_oneof![
7420 (0usize..200, text_with_newlines())
7421 .prop_map(|(offset, text)| { Operation::Insert { offset, text } }),
7422 (0usize..200, 1usize..50)
7423 .prop_map(|(offset, bytes)| { Operation::Delete { offset, bytes } }),
7424 ],
7425 0..50,
7426 )
7427 }
7428
7429 proptest! {
7430 #[test]
7431 fn prop_line_count_consistent(text in text_with_newlines()) {
7432 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7433
7434 let newline_count = text.iter().filter(|&&b| b == b'\n').count();
7435 prop_assert_eq!(buffer.line_count(), Some(newline_count + 1));
7436 }
7437
7438 #[test]
7439 fn prop_get_all_text_matches_original(text in text_with_newlines()) {
7440 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7441 prop_assert_eq!(buffer.get_all_text().unwrap(), text);
7442 }
7443
7444 #[test]
7445 fn prop_insert_increases_size(
7446 text in text_with_newlines(),
7447 offset in 0usize..100,
7448 insert_text in text_with_newlines()
7449 ) {
7450 let mut buffer = TextBuffer::from_bytes(text, test_fs());
7451 let initial_bytes = buffer.total_bytes();
7452
7453 let offset = offset.min(buffer.total_bytes());
7454 buffer.insert_bytes(offset, insert_text.clone());
7455
7456 prop_assert_eq!(buffer.total_bytes(), initial_bytes + insert_text.len());
7457 }
7458
7459 #[test]
7460 fn prop_delete_decreases_size(
7461 text in text_with_newlines(),
7462 offset in 0usize..100,
7463 delete_bytes in 1usize..50
7464 ) {
7465 if text.is_empty() {
7466 return Ok(());
7467 }
7468
7469 let mut buffer = TextBuffer::from_bytes(text, test_fs());
7470 let initial_bytes = buffer.total_bytes();
7471
7472 let offset = offset.min(buffer.total_bytes());
7473 let delete_bytes = delete_bytes.min(buffer.total_bytes() - offset);
7474
7475 if delete_bytes == 0 {
7476 return Ok(());
7477 }
7478
7479 buffer.delete_bytes(offset, delete_bytes);
7480
7481 prop_assert_eq!(buffer.total_bytes(), initial_bytes - delete_bytes);
7482 }
7483
7484 #[test]
7485 fn prop_insert_then_delete_restores_original(
7486 text in text_with_newlines(),
7487 offset in 0usize..100,
7488 insert_text in text_with_newlines()
7489 ) {
7490 let mut buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7491
7492 let offset = offset.min(buffer.total_bytes());
7493 buffer.insert_bytes(offset, insert_text.clone());
7494 buffer.delete_bytes(offset, insert_text.len());
7495
7496 prop_assert_eq!(buffer.get_all_text().unwrap(), text);
7497 }
7498
7499 #[test]
7500 fn prop_offset_position_roundtrip(text in text_with_newlines()) {
7501 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7502
7503 for offset in 0..text.len() {
7504 let pos = buffer.offset_to_position(offset).expect("offset_to_position should succeed for valid offset");
7505 let back = buffer.position_to_offset(pos);
7506 prop_assert_eq!(back, offset, "Failed roundtrip for offset {}", offset);
7507 }
7508 }
7509
7510 #[test]
7511 fn prop_get_text_range_valid(
7512 text in text_with_newlines(),
7513 offset in 0usize..100,
7514 length in 1usize..50
7515 ) {
7516 if text.is_empty() {
7517 return Ok(());
7518 }
7519
7520 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7521 let offset = offset.min(buffer.total_bytes());
7522 let length = length.min(buffer.total_bytes() - offset);
7523
7524 if length == 0 {
7525 return Ok(());
7526 }
7527
7528 let result = buffer.get_text_range(offset, length);
7529 prop_assert_eq!(result, Some(text[offset..offset + length].to_vec()));
7530 }
7531
7532 #[test]
7533 fn prop_operations_maintain_consistency(operations in operation_strategy()) {
7534 let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
7535 let mut expected_text = b"initial\ntext".to_vec();
7536
7537 for op in operations {
7538 match op {
7539 Operation::Insert { offset, text } => {
7540 let offset = offset.min(buffer.total_bytes());
7541 buffer.insert_bytes(offset, text.clone());
7542
7543 let offset = offset.min(expected_text.len());
7545 expected_text.splice(offset..offset, text);
7546 }
7547 Operation::Delete { offset, bytes } => {
7548 if offset < buffer.total_bytes() {
7549 let bytes = bytes.min(buffer.total_bytes() - offset);
7550 buffer.delete_bytes(offset, bytes);
7551
7552 if offset < expected_text.len() {
7554 let bytes = bytes.min(expected_text.len() - offset);
7555 expected_text.drain(offset..offset + bytes);
7556 }
7557 }
7558 }
7559 }
7560 }
7561
7562 prop_assert_eq!(buffer.get_all_text().unwrap(), expected_text);
7563 }
7564
7565 #[test]
7566 fn prop_line_count_never_zero(operations in operation_strategy()) {
7567 let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
7568
7569 for op in operations {
7570 match op {
7571 Operation::Insert { offset, text } => {
7572 let offset = offset.min(buffer.total_bytes());
7573 buffer.insert_bytes(offset, text);
7574 }
7575 Operation::Delete { offset, bytes } => {
7576 buffer.delete_bytes(offset, bytes);
7577 }
7578 }
7579
7580 prop_assert!(buffer.line_count().unwrap_or(1) >= 1);
7582 }
7583 }
7584
7585 #[test]
7586 fn prop_total_bytes_never_negative(operations in operation_strategy()) {
7587 let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
7588
7589 for op in operations {
7590 match op {
7591 Operation::Insert { offset, text } => {
7592 let offset = offset.min(buffer.total_bytes());
7593 buffer.insert_bytes(offset, text);
7594 }
7595 Operation::Delete { offset, bytes } => {
7596 buffer.delete_bytes(offset, bytes);
7597 }
7598 }
7599
7600 prop_assert!(buffer.total_bytes() < 10_000_000);
7602 }
7603 }
7604
7605 #[test]
7606 fn prop_piece_tree_and_line_index_stay_synced(operations in operation_strategy()) {
7607 let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
7608
7609 for op in operations {
7610 match op {
7611 Operation::Insert { offset, text } => {
7612 let offset = offset.min(buffer.total_bytes());
7613 buffer.insert_bytes(offset, text);
7614 }
7615 Operation::Delete { offset, bytes } => {
7616 buffer.delete_bytes(offset, bytes);
7617 }
7618 }
7619
7620 if buffer.total_bytes() > 0 {
7622 let mid_offset = buffer.total_bytes() / 2;
7623 if let Some(pos) = buffer.offset_to_position(mid_offset) {
7624 let back = buffer.position_to_offset(pos);
7625
7626 prop_assert!(back <= buffer.total_bytes());
7628 }
7629 }
7630 }
7631 }
7632
7633 #[test]
7634 fn prop_write_recipe_matches_content(text in text_with_newlines()) {
7635 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
7636 let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
7637
7638 let output = apply_recipe(&buffer, &recipe);
7640 prop_assert_eq!(output, text, "Recipe output should match original content");
7641 }
7642
7643 #[test]
7644 fn prop_write_recipe_after_edits(
7645 initial_text in text_with_newlines(),
7646 operations in operation_strategy()
7647 ) {
7648 let mut buffer = TextBuffer::from_bytes(initial_text, test_fs());
7649
7650 for op in operations {
7652 match op {
7653 Operation::Insert { offset, text } => {
7654 let offset = offset.min(buffer.total_bytes());
7655 buffer.insert_bytes(offset, text);
7656 }
7657 Operation::Delete { offset, bytes } => {
7658 if offset < buffer.total_bytes() {
7659 let bytes = bytes.min(buffer.total_bytes() - offset);
7660 if bytes > 0 {
7661 buffer.delete_bytes(offset, bytes);
7662 }
7663 }
7664 }
7665 }
7666 }
7667
7668 let expected = buffer.get_all_text().unwrap();
7670 let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
7671 let output = apply_recipe(&buffer, &recipe);
7672
7673 prop_assert_eq!(output, expected, "Recipe output should match buffer content after edits");
7674 }
7675
7676 #[test]
7677 fn prop_write_recipe_copy_ops_valid(
7678 text in prop::collection::vec(prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n')], 10..200),
7679 edit_offset in 0usize..100,
7680 edit_text in text_with_newlines()
7681 ) {
7682 use tempfile::TempDir;
7683
7684 let temp_dir = TempDir::new().unwrap();
7686 let file_path = temp_dir.path().join("test.txt");
7687 std::fs::write(&file_path, &text).unwrap();
7688
7689 let mut buffer = TextBuffer::load_from_file(&file_path, 1024 * 1024, test_fs()).unwrap();
7691
7692 let edit_offset = edit_offset.min(buffer.total_bytes());
7694 buffer.insert_bytes(edit_offset, edit_text.clone());
7695
7696 let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
7698
7699 let expected = buffer.get_all_text().unwrap();
7701 let output = apply_recipe(&buffer, &recipe);
7702 prop_assert_eq!(output, expected, "Recipe with Copy ops should match buffer content");
7703
7704 if text.len() > 100 && edit_offset > 10 {
7707 let has_copy = recipe.actions.iter().any(|a| matches!(a, RecipeAction::Copy { .. }));
7708 let _ = has_copy;
7711 }
7712 }
7713 }
7714
7715 fn apply_recipe(buffer: &TextBuffer, recipe: &WriteRecipe) -> Vec<u8> {
7717 let mut output = Vec::new();
7718 for action in &recipe.actions {
7719 match action {
7720 RecipeAction::Copy { offset, len } => {
7721 if let Some(src_path) = &recipe.src_path {
7722 let data = buffer
7723 .fs
7724 .read_range(src_path, *offset, *len as usize)
7725 .expect("read_range should succeed for Copy op");
7726 output.extend_from_slice(&data);
7727 } else {
7728 panic!("Copy action without source path");
7729 }
7730 }
7731 RecipeAction::Insert { index } => {
7732 output.extend_from_slice(&recipe.insert_data[*index]);
7733 }
7734 }
7735 }
7736 output
7737 }
7738
7739 fn is_detected_as_binary(bytes: &[u8]) -> bool {
7741 TextBuffer::detect_encoding_or_binary(bytes, false).1
7742 }
7743
7744 #[test]
7745 fn test_detect_binary_text_files() {
7746 assert!(!is_detected_as_binary(b"Hello, world!"));
7748 assert!(!is_detected_as_binary(b"Line 1\nLine 2\nLine 3"));
7749 assert!(!is_detected_as_binary(b"Tabs\tand\tnewlines\n"));
7750 assert!(!is_detected_as_binary(b"Carriage return\r\n"));
7751
7752 assert!(!is_detected_as_binary(b""));
7754
7755 assert!(!is_detected_as_binary(b"\x1b[31mRed text\x1b[0m"));
7757 }
7758
7759 #[test]
7760 fn test_detect_binary_binary_files() {
7761 assert!(is_detected_as_binary(b"Hello\x00World"));
7763 assert!(is_detected_as_binary(b"\x00"));
7764
7765 assert!(is_detected_as_binary(b"Text with \x01 control char"));
7767 assert!(is_detected_as_binary(b"\x02\x03\x04"));
7768
7769 assert!(is_detected_as_binary(b"Text with DEL\x7F"));
7771 }
7772
7773 #[test]
7774 fn test_detect_binary_png_file() {
7775 let png_header: &[u8] = &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
7778 assert!(is_detected_as_binary(png_header));
7779
7780 let mut png_data = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
7782 png_data.extend_from_slice(b"\x00\x00\x00\x0DIHDR"); assert!(is_detected_as_binary(&png_data));
7784 }
7785
7786 #[test]
7787 fn test_detect_binary_other_image_formats() {
7788 let jpeg_header: &[u8] = &[0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10];
7790 assert!(is_detected_as_binary(jpeg_header));
7791
7792 let gif_data: &[u8] = &[
7795 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, ];
7801 assert!(is_detected_as_binary(gif_data));
7803
7804 let bmp_header: &[u8] = &[0x42, 0x4D, 0x00, 0x00, 0x00, 0x00];
7806 assert!(is_detected_as_binary(bmp_header));
7807 }
7808
7809 #[test]
7810 fn test_detect_binary_executable_formats() {
7811 let elf_header: &[u8] = &[0x7F, 0x45, 0x4C, 0x46, 0x02, 0x01, 0x01, 0x00];
7813 assert!(is_detected_as_binary(elf_header));
7814
7815 let macho_header: &[u8] = &[0xCF, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x01];
7817 assert!(is_detected_as_binary(macho_header));
7818
7819 let pe_header: &[u8] = &[0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00];
7821 assert!(is_detected_as_binary(pe_header));
7822 }
7823}
7824
7825#[derive(Debug, Clone)]
7827pub struct LineData {
7828 pub byte_offset: usize,
7830 pub content: String,
7832 pub has_newline: bool,
7834 pub line_number: Option<usize>,
7836}
7837
7838pub struct TextBufferLineIterator {
7841 lines: Vec<LineData>,
7843 current_index: usize,
7845 pub has_more: bool,
7847}
7848
7849impl TextBufferLineIterator {
7850 pub(crate) fn new(buffer: &mut TextBuffer, byte_pos: usize, max_lines: usize) -> Result<Self> {
7851 let buffer_len = buffer.len();
7852 if byte_pos >= buffer_len {
7853 return Ok(Self {
7854 lines: Vec::new(),
7855 current_index: 0,
7856 has_more: false,
7857 });
7858 }
7859
7860 let has_line_metadata = buffer.line_count().is_some();
7862
7863 let mut current_line = if has_line_metadata {
7866 buffer.offset_to_position(byte_pos).map(|pos| pos.line)
7867 } else {
7868 None
7869 };
7870
7871 let mut lines = Vec::with_capacity(max_lines);
7872 let mut current_offset = byte_pos;
7873 let estimated_line_length = 80; for _ in 0..max_lines {
7877 if current_offset >= buffer_len {
7878 break;
7879 }
7880
7881 let line_start = current_offset;
7882 let line_number = current_line;
7883
7884 let estimated_max_line_length = estimated_line_length * 3;
7886 let bytes_to_scan = estimated_max_line_length.min(buffer_len - current_offset);
7887
7888 let chunk = buffer.get_text_range_mut(current_offset, bytes_to_scan)?;
7890
7891 let mut line_len = 0;
7893 let mut found_newline = false;
7894 for &byte in chunk.iter() {
7895 line_len += 1;
7896 if byte == b'\n' {
7897 found_newline = true;
7898 break;
7899 }
7900 }
7901
7902 if !found_newline && current_offset + line_len < buffer_len {
7904 let remaining = buffer_len - current_offset - line_len;
7906 let additional_bytes = estimated_max_line_length.min(remaining);
7907 let more_chunk =
7908 buffer.get_text_range_mut(current_offset + line_len, additional_bytes)?;
7909
7910 let mut extended_chunk = chunk;
7911 extended_chunk.extend_from_slice(&more_chunk);
7912
7913 for &byte in more_chunk.iter() {
7914 line_len += 1;
7915 if byte == b'\n' {
7916 found_newline = true;
7917 break;
7918 }
7919 }
7920
7921 let line_string = String::from_utf8_lossy(&extended_chunk[..line_len]).into_owned();
7922 let has_newline = line_string.ends_with('\n');
7923 let content = if has_newline {
7924 line_string[..line_string.len() - 1].to_string()
7925 } else {
7926 line_string
7927 };
7928
7929 lines.push(LineData {
7930 byte_offset: line_start,
7931 content,
7932 has_newline,
7933 line_number,
7934 });
7935
7936 current_offset += line_len;
7937 if has_line_metadata && found_newline {
7938 current_line = current_line.map(|n| n + 1);
7939 }
7940 continue;
7941 }
7942
7943 let line_string = String::from_utf8_lossy(&chunk[..line_len]).into_owned();
7945 let has_newline = line_string.ends_with('\n');
7946 let content = if has_newline {
7947 line_string[..line_string.len() - 1].to_string()
7948 } else {
7949 line_string
7950 };
7951
7952 lines.push(LineData {
7953 byte_offset: line_start,
7954 content,
7955 has_newline,
7956 line_number,
7957 });
7958
7959 current_offset += line_len;
7960 if has_line_metadata && found_newline {
7962 current_line = current_line.map(|n| n + 1);
7963 }
7964 }
7965
7966 let has_more = current_offset < buffer_len;
7968
7969 Ok(Self {
7970 lines,
7971 current_index: 0,
7972 has_more,
7973 })
7974 }
7975}
7976
7977impl Iterator for TextBufferLineIterator {
7978 type Item = LineData;
7979
7980 fn next(&mut self) -> Option<Self::Item> {
7981 if self.current_index < self.lines.len() {
7982 let line = self.lines[self.current_index].clone();
7983 self.current_index += 1;
7984 Some(line)
7985 } else {
7986 None
7987 }
7988 }
7989}