1use crate::model::filesystem::{FileMetadata, FileSystem, WriteOp};
4use crate::model::piece_tree::{
5 BufferData, BufferLocation, Cursor, PieceInfo, PieceRangeIter, PieceTree, Position,
6 StringBuffer, TreeStats,
7};
8use crate::model::piece_tree_diff::PieceTreeDiff;
9use crate::primitives::grapheme;
10use anyhow::{Context, Result};
11use regex::bytes::Regex;
12use std::io::{self, Write};
13use std::ops::Range;
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16
17#[derive(Debug, Clone, PartialEq)]
22pub struct SudoSaveRequired {
23 pub temp_path: PathBuf,
25 pub dest_path: PathBuf,
27 pub uid: u32,
29 pub gid: u32,
31 pub mode: u32,
33}
34
35impl std::fmt::Display for SudoSaveRequired {
36 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37 write!(
38 f,
39 "Permission denied saving to {}. Use sudo to complete the operation.",
40 self.dest_path.display()
41 )
42 }
43}
44
45impl std::error::Error for SudoSaveRequired {}
46
47pub const DEFAULT_LARGE_FILE_THRESHOLD: usize = 100 * 1024 * 1024;
50
51pub const LOAD_CHUNK_SIZE: usize = 1024 * 1024;
53
54pub const CHUNK_ALIGNMENT: usize = 64 * 1024;
56
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
59pub enum LineEnding {
60 #[default]
62 LF,
63 CRLF,
65 CR,
67}
68
69impl LineEnding {
70 pub fn as_str(&self) -> &'static str {
72 match self {
73 Self::LF => "\n",
74 Self::CRLF => "\r\n",
75 Self::CR => "\r",
76 }
77 }
78
79 pub fn display_name(&self) -> &'static str {
81 match self {
82 Self::LF => "LF",
83 Self::CRLF => "CRLF",
84 Self::CR => "CR",
85 }
86 }
87}
88
89struct WriteRecipe {
91 src_path: Option<PathBuf>,
93 insert_data: Vec<Vec<u8>>,
95 actions: Vec<RecipeAction>,
97}
98
99#[derive(Debug, Clone, Copy)]
101enum RecipeAction {
102 Copy { offset: u64, len: u64 },
104 Insert { index: usize },
106}
107
108impl WriteRecipe {
109 fn to_write_ops(&self) -> Vec<WriteOp<'_>> {
111 self.actions
112 .iter()
113 .map(|action| match action {
114 RecipeAction::Copy { offset, len } => WriteOp::Copy {
115 offset: *offset,
116 len: *len,
117 },
118 RecipeAction::Insert { index } => WriteOp::Insert {
119 data: &self.insert_data[*index],
120 },
121 })
122 .collect()
123 }
124
125 fn has_copy_ops(&self) -> bool {
127 self.actions
128 .iter()
129 .any(|a| matches!(a, RecipeAction::Copy { .. }))
130 }
131
132 fn flatten_inserts(&self) -> Vec<u8> {
135 let mut result = Vec::new();
136 for action in &self.actions {
137 if let RecipeAction::Insert { index } = action {
138 result.extend_from_slice(&self.insert_data[*index]);
139 }
140 }
141 result
142 }
143}
144
145#[derive(Debug, Clone, Copy, PartialEq, Eq)]
148pub enum LineNumber {
149 Absolute(usize),
151 Relative {
153 line: usize,
154 from_cached_line: usize,
155 },
156}
157
158impl LineNumber {
159 pub fn value(&self) -> usize {
161 match self {
162 Self::Absolute(line) | Self::Relative { line, .. } => *line,
163 }
164 }
165
166 pub fn is_absolute(&self) -> bool {
168 matches!(self, LineNumber::Absolute(_))
169 }
170
171 pub fn is_relative(&self) -> bool {
173 matches!(self, LineNumber::Relative { .. })
174 }
175
176 pub fn format(&self) -> String {
178 match self {
179 Self::Absolute(line) => format!("{}", line + 1),
180 Self::Relative { line, .. } => format!("~{}", line + 1),
181 }
182 }
183}
184
185pub struct TextBuffer {
188 fs: Arc<dyn FileSystem + Send + Sync>,
191
192 piece_tree: PieceTree,
194
195 saved_root: Arc<crate::model::piece_tree::PieceTreeNode>,
197
198 buffers: Vec<StringBuffer>,
202
203 next_buffer_id: usize,
205
206 file_path: Option<PathBuf>,
208
209 modified: bool,
211
212 recovery_pending: bool,
216
217 large_file: bool,
219
220 is_binary: bool,
223
224 line_ending: LineEnding,
226
227 original_line_ending: LineEnding,
231
232 saved_file_size: Option<usize>,
236
237 version: u64,
239}
240
241impl TextBuffer {
242 pub fn new(_large_file_threshold: usize, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
245 let piece_tree = PieceTree::empty();
246 let line_ending = LineEnding::default();
247 TextBuffer {
248 fs,
249 saved_root: piece_tree.root(),
250 piece_tree,
251 buffers: vec![StringBuffer::new(0, Vec::new())],
252 next_buffer_id: 1,
253 file_path: None,
254 modified: false,
255 recovery_pending: false,
256 large_file: false,
257 is_binary: false,
258 line_ending,
259 original_line_ending: line_ending,
260 saved_file_size: None,
261 version: 0,
262 }
263 }
264
265 pub fn version(&self) -> u64 {
267 self.version
268 }
269
270 pub fn filesystem(&self) -> &Arc<dyn FileSystem + Send + Sync> {
272 &self.fs
273 }
274
275 pub fn set_filesystem(&mut self, fs: Arc<dyn FileSystem + Send + Sync>) {
277 self.fs = fs;
278 }
279
280 #[inline]
281 fn bump_version(&mut self) {
282 self.version = self.version.wrapping_add(1);
283 }
284
285 #[inline]
286 fn mark_content_modified(&mut self) {
287 self.modified = true;
288 self.recovery_pending = true;
289 self.bump_version();
290 }
291
292 pub fn from_bytes(content: Vec<u8>, fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
294 let bytes = content.len();
295
296 let line_ending = Self::detect_line_ending(&content);
298
299 let buffer = StringBuffer::new(0, content);
301 let line_feed_cnt = buffer.line_feed_count();
302
303 let piece_tree = if bytes > 0 {
304 PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
305 } else {
306 PieceTree::empty()
307 };
308
309 let saved_root = piece_tree.root();
310
311 TextBuffer {
312 fs,
313 line_ending,
314 original_line_ending: line_ending,
315 piece_tree,
316 saved_root,
317 buffers: vec![buffer],
318 next_buffer_id: 1,
319 file_path: None,
320 modified: false,
321 recovery_pending: false,
322 large_file: false,
323 is_binary: false,
324 saved_file_size: Some(bytes), version: 0,
326 }
327 }
328
329 pub fn from_str(
331 s: &str,
332 _large_file_threshold: usize,
333 fs: Arc<dyn FileSystem + Send + Sync>,
334 ) -> Self {
335 Self::from_bytes(s.as_bytes().to_vec(), fs)
336 }
337
338 pub fn empty(fs: Arc<dyn FileSystem + Send + Sync>) -> Self {
340 let piece_tree = PieceTree::empty();
341 let saved_root = piece_tree.root();
342 let line_ending = LineEnding::default();
343 TextBuffer {
344 fs,
345 piece_tree,
346 saved_root,
347 buffers: vec![StringBuffer::new(0, Vec::new())],
348 next_buffer_id: 1,
349 file_path: None,
350 modified: false,
351 recovery_pending: false,
352 large_file: false,
353 is_binary: false,
354 line_ending,
355 original_line_ending: line_ending,
356 saved_file_size: None,
357 version: 0,
358 }
359 }
360
361 pub fn load_from_file<P: AsRef<Path>>(
363 path: P,
364 large_file_threshold: usize,
365 fs: Arc<dyn FileSystem + Send + Sync>,
366 ) -> anyhow::Result<Self> {
367 let path = path.as_ref();
368
369 let metadata = fs.metadata(path)?;
371 let file_size = metadata.size as usize;
372
373 let threshold = if large_file_threshold > 0 {
375 large_file_threshold
376 } else {
377 DEFAULT_LARGE_FILE_THRESHOLD
378 };
379
380 if file_size >= threshold {
382 Self::load_large_file(path, file_size, fs)
383 } else {
384 Self::load_small_file(path, fs)
385 }
386 }
387
388 fn load_small_file(path: &Path, fs: Arc<dyn FileSystem + Send + Sync>) -> anyhow::Result<Self> {
390 let contents = fs.read_file(path)?;
391
392 let is_binary = Self::detect_binary(&contents);
394
395 let line_ending = Self::detect_line_ending(&contents);
397
398 let mut buffer = Self::from_bytes(contents, fs);
400 buffer.file_path = Some(path.to_path_buf());
401 buffer.modified = false;
402 buffer.large_file = false;
403 buffer.is_binary = is_binary;
404 buffer.line_ending = line_ending;
405 buffer.original_line_ending = line_ending;
406 Ok(buffer)
407 }
408
409 fn load_large_file(
411 path: &Path,
412 file_size: usize,
413 fs: Arc<dyn FileSystem + Send + Sync>,
414 ) -> anyhow::Result<Self> {
415 use crate::model::piece_tree::{BufferData, BufferLocation};
416
417 let sample_size = file_size.min(8 * 1024);
420 let sample = fs.read_range(path, 0, sample_size)?;
421 let is_binary = Self::detect_binary(&sample);
422 let line_ending = Self::detect_line_ending(&sample);
423
424 let buffer = StringBuffer {
426 id: 0,
427 data: BufferData::Unloaded {
428 file_path: path.to_path_buf(),
429 file_offset: 0,
430 bytes: file_size,
431 },
432 };
433
434 let piece_tree = if file_size > 0 {
437 PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
438 } else {
439 PieceTree::empty()
440 };
441 let saved_root = piece_tree.root();
442
443 tracing::debug!(
444 "Buffer::load_from_file: loaded {} bytes, saved_file_size={}",
445 file_size,
446 file_size
447 );
448
449 Ok(TextBuffer {
450 fs,
451 piece_tree,
452 saved_root,
453 buffers: vec![buffer],
454 next_buffer_id: 1,
455 file_path: Some(path.to_path_buf()),
456 modified: false,
457 recovery_pending: false,
458 large_file: true,
459 is_binary,
460 line_ending,
461 original_line_ending: line_ending,
462 saved_file_size: Some(file_size),
463 version: 0,
464 })
465 }
466
467 pub fn save(&mut self) -> anyhow::Result<()> {
469 if let Some(path) = &self.file_path {
470 self.save_to_file(path.clone())
471 } else {
472 anyhow::bail!(io::Error::new(
473 io::ErrorKind::NotFound,
474 "No file path associated with buffer",
475 ))
476 }
477 }
478
479 fn should_use_inplace_write(&self, dest_path: &Path) -> bool {
486 !self.fs.is_owner(dest_path)
487 }
488
489 fn build_write_recipe(&self) -> io::Result<WriteRecipe> {
498 let total = self.total_bytes();
499
500 let needs_conversion = self.line_ending != self.original_line_ending;
506 let src_path_for_copy: Option<&Path> = if needs_conversion {
507 None
508 } else {
509 self.file_path.as_deref().filter(|p| self.fs.exists(p))
510 };
511 let target_ending = self.line_ending;
512
513 let mut insert_data: Vec<Vec<u8>> = Vec::new();
514 let mut actions: Vec<RecipeAction> = Vec::new();
515
516 for piece_view in self.piece_tree.iter_pieces_in_range(0, total) {
517 let buffer_id = piece_view.location.buffer_id();
518 let buffer = self.buffers.get(buffer_id).ok_or_else(|| {
519 io::Error::new(
520 io::ErrorKind::InvalidData,
521 format!("Buffer {} not found", buffer_id),
522 )
523 })?;
524
525 match &buffer.data {
526 BufferData::Unloaded {
528 file_path,
529 file_offset,
530 ..
531 } => {
532 let can_copy = matches!(piece_view.location, BufferLocation::Stored(_))
538 && src_path_for_copy.is_some_and(|src| file_path == src);
539
540 if can_copy {
541 let src_offset = (*file_offset + piece_view.buffer_offset) as u64;
542 actions.push(RecipeAction::Copy {
543 offset: src_offset,
544 len: piece_view.bytes as u64,
545 });
546 continue;
547 }
548
549 let data = self.fs.read_range(
552 file_path,
553 (*file_offset + piece_view.buffer_offset) as u64,
554 piece_view.bytes,
555 )?;
556
557 let data = if needs_conversion {
558 Self::convert_line_endings_to(&data, target_ending)
559 } else {
560 data
561 };
562
563 let index = insert_data.len();
564 insert_data.push(data);
565 actions.push(RecipeAction::Insert { index });
566 }
567
568 BufferData::Loaded { data, .. } => {
570 let start = piece_view.buffer_offset;
571 let end = start + piece_view.bytes;
572 let chunk = &data[start..end];
573
574 let chunk = if needs_conversion {
575 Self::convert_line_endings_to(chunk, target_ending)
576 } else {
577 chunk.to_vec()
578 };
579
580 let index = insert_data.len();
581 insert_data.push(chunk);
582 actions.push(RecipeAction::Insert { index });
583 }
584 }
585 }
586
587 Ok(WriteRecipe {
588 src_path: src_path_for_copy.map(|p| p.to_path_buf()),
589 insert_data,
590 actions,
591 })
592 }
593
594 fn create_temp_file(
600 &self,
601 dest_path: &Path,
602 ) -> io::Result<(PathBuf, Box<dyn crate::model::filesystem::FileWriter>)> {
603 let same_dir_temp = self.fs.temp_path_for(dest_path);
605 match self.fs.create_file(&same_dir_temp) {
606 Ok(file) => Ok((same_dir_temp, file)),
607 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
608 let temp_path = self.fs.unique_temp_path(dest_path);
610 let file = self.fs.create_file(&temp_path)?;
611 Ok((temp_path, file))
612 }
613 Err(e) => Err(e),
614 }
615 }
616
617 pub fn save_to_file<P: AsRef<Path>>(&mut self, path: P) -> anyhow::Result<()> {
632 let dest_path = path.as_ref();
633 let total = self.total_bytes();
634
635 if total == 0 {
637 self.fs.write_file(dest_path, &[])?;
638 self.finalize_save(dest_path)?;
639 return Ok(());
640 }
641
642 let recipe = self.build_write_recipe()?;
644 let ops = recipe.to_write_ops();
645
646 let is_local = self.fs.remote_connection_info().is_none();
649 let use_inplace = is_local && self.should_use_inplace_write(dest_path);
650
651 if use_inplace {
652 self.save_with_inplace_write(dest_path, &recipe)?;
654 } else if !recipe.has_copy_ops() && !is_local {
655 let data = recipe.flatten_inserts();
657 self.fs.write_file(dest_path, &data)?;
658 } else if is_local {
659 let write_result = if !recipe.has_copy_ops() {
661 let data = recipe.flatten_inserts();
662 self.fs.write_file(dest_path, &data)
663 } else {
664 let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
665 self.fs.write_patched(src_for_patch, dest_path, &ops)
666 };
667
668 if let Err(e) = write_result {
669 if e.kind() == io::ErrorKind::PermissionDenied {
670 let original_metadata = self.fs.metadata_if_exists(dest_path);
672 let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
673 self.write_recipe_to_file(&mut temp_file, &recipe)?;
674 temp_file.sync_all()?;
675 drop(temp_file);
676 return Err(self.make_sudo_error(temp_path, dest_path, original_metadata));
677 }
678 return Err(e.into());
679 }
680 } else {
681 let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
683 self.fs.write_patched(src_for_patch, dest_path, &ops)?;
684 }
685
686 self.finalize_save(dest_path)?;
687 Ok(())
688 }
689
690 fn save_with_inplace_write(
695 &self,
696 dest_path: &Path,
697 recipe: &WriteRecipe,
698 ) -> anyhow::Result<()> {
699 let original_metadata = self.fs.metadata_if_exists(dest_path);
700
701 match self.fs.open_file_for_write(dest_path) {
702 Ok(mut out_file) => {
703 self.write_recipe_to_file(&mut out_file, recipe)?;
705 out_file.sync_all()?;
706 Ok(())
707 }
708 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
709 let ops = recipe.to_write_ops();
712 let src_for_patch = recipe.src_path.as_deref().unwrap_or(dest_path);
713
714 match self.fs.write_patched(src_for_patch, dest_path, &ops) {
715 Ok(()) => Ok(()),
716 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
717 let (temp_path, mut temp_file) = self.create_temp_file(dest_path)?;
719 self.write_recipe_to_file(&mut temp_file, recipe)?;
720 temp_file.sync_all()?;
721 drop(temp_file);
722 Err(self.make_sudo_error(temp_path, dest_path, original_metadata))
723 }
724 Err(e) => Err(e.into()),
725 }
726 }
727 Err(e) => Err(e.into()),
728 }
729 }
730
731 fn write_recipe_to_file(
733 &self,
734 out_file: &mut Box<dyn crate::model::filesystem::FileWriter>,
735 recipe: &WriteRecipe,
736 ) -> io::Result<()> {
737 for action in &recipe.actions {
738 match action {
739 RecipeAction::Copy { offset, len } => {
740 let src_path = recipe.src_path.as_ref().ok_or_else(|| {
742 io::Error::new(io::ErrorKind::InvalidData, "Copy action without source")
743 })?;
744 let data = self.fs.read_range(src_path, *offset, *len as usize)?;
745 out_file.write_all(&data)?;
746 }
747 RecipeAction::Insert { index } => {
748 out_file.write_all(&recipe.insert_data[*index])?;
749 }
750 }
751 }
752 Ok(())
753 }
754
755 fn finalize_save(&mut self, dest_path: &Path) -> anyhow::Result<()> {
757 let new_size = self.fs.metadata(dest_path)?.size as usize;
758 tracing::debug!(
759 "Buffer::save: updating saved_file_size from {:?} to {}",
760 self.saved_file_size,
761 new_size
762 );
763 self.saved_file_size = Some(new_size);
764 self.file_path = Some(dest_path.to_path_buf());
765
766 if self.large_file {
769 self.consolidate_large_file(dest_path, new_size);
770 }
771
772 self.mark_saved_snapshot();
773 self.original_line_ending = self.line_ending;
774 Ok(())
775 }
776
777 pub fn finalize_external_save(&mut self, dest_path: PathBuf) -> anyhow::Result<()> {
781 let new_size = self.fs.metadata(&dest_path)?.size as usize;
782 self.saved_file_size = Some(new_size);
783 self.file_path = Some(dest_path.clone());
784
785 if self.large_file {
787 self.consolidate_large_file(&dest_path, new_size);
788 }
789
790 self.mark_saved_snapshot();
791 self.original_line_ending = self.line_ending;
792 Ok(())
793 }
794
795 fn consolidate_large_file(&mut self, path: &Path, file_size: usize) {
798 let buffer = StringBuffer {
799 id: 0,
800 data: BufferData::Unloaded {
801 file_path: path.to_path_buf(),
802 file_offset: 0,
803 bytes: file_size,
804 },
805 };
806
807 self.piece_tree = if file_size > 0 {
808 PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
809 } else {
810 PieceTree::empty()
811 };
812
813 self.buffers = vec![buffer];
814 self.next_buffer_id = 1;
815
816 tracing::debug!(
817 "Buffer::consolidate_large_file: consolidated into single piece of {} bytes",
818 file_size
819 );
820 }
821
822 fn make_sudo_error(
824 &self,
825 temp_path: PathBuf,
826 dest_path: &Path,
827 original_metadata: Option<FileMetadata>,
828 ) -> anyhow::Error {
829 #[cfg(unix)]
830 let (uid, gid, mode) = if let Some(ref meta) = original_metadata {
831 (
832 meta.uid.unwrap_or(0),
833 meta.gid.unwrap_or(0),
834 meta.permissions
835 .as_ref()
836 .map(|p| p.mode() & 0o7777)
837 .unwrap_or(0),
838 )
839 } else {
840 (0, 0, 0)
841 };
842 #[cfg(not(unix))]
843 let (uid, gid, mode) = (0u32, 0u32, 0u32);
844
845 let _ = original_metadata; anyhow::anyhow!(SudoSaveRequired {
848 temp_path,
849 dest_path: dest_path.to_path_buf(),
850 uid,
851 gid,
852 mode,
853 })
854 }
855
856 pub fn total_bytes(&self) -> usize {
858 self.piece_tree.total_bytes()
859 }
860
861 pub fn line_count(&self) -> Option<usize> {
865 self.piece_tree.line_count()
866 }
867
868 pub fn mark_saved_snapshot(&mut self) {
870 self.saved_root = self.piece_tree.root();
871 self.modified = false;
872 }
873
874 pub fn diff_since_saved(&self) -> PieceTreeDiff {
886 if Arc::ptr_eq(&self.saved_root, &self.piece_tree.root()) {
889 return PieceTreeDiff {
890 equal: true,
891 byte_ranges: Vec::new(),
892 line_ranges: Some(Vec::new()),
893 };
894 }
895
896 let structure_diff = self.diff_trees_by_structure();
899
900 if structure_diff.equal {
902 return structure_diff;
903 }
904
905 let total_changed_bytes: usize = structure_diff
909 .byte_ranges
910 .iter()
911 .map(|r| r.end.saturating_sub(r.start))
912 .sum();
913
914 const MAX_VERIFY_BYTES: usize = 64 * 1024; if total_changed_bytes <= MAX_VERIFY_BYTES && !structure_diff.byte_ranges.is_empty() {
919 if self.verify_content_differs_in_ranges(&structure_diff.byte_ranges) {
921 return structure_diff;
923 } else {
924 return PieceTreeDiff {
926 equal: true,
927 byte_ranges: Vec::new(),
928 line_ranges: Some(Vec::new()),
929 };
930 }
931 }
932
933 structure_diff
935 }
936
937 fn verify_content_differs_in_ranges(&self, byte_ranges: &[std::ops::Range<usize>]) -> bool {
940 let saved_bytes = self.tree_total_bytes(&self.saved_root);
941 let current_bytes = self.piece_tree.total_bytes();
942
943 if saved_bytes != current_bytes {
945 return true;
946 }
947
948 for range in byte_ranges {
950 if range.start >= range.end {
951 continue;
952 }
953
954 let saved_slice =
956 self.extract_range_from_tree(&self.saved_root, range.start, range.end);
957 let current_slice = self.get_text_range(range.start, range.end);
959
960 match (saved_slice, current_slice) {
961 (Some(saved), Some(current)) => {
962 if saved != current {
963 return true; }
965 }
966 _ => {
967 return true;
969 }
970 }
971 }
972
973 false
975 }
976
977 fn extract_range_from_tree(
979 &self,
980 root: &Arc<crate::model::piece_tree::PieceTreeNode>,
981 start: usize,
982 end: usize,
983 ) -> Option<Vec<u8>> {
984 let mut result = Vec::with_capacity(end.saturating_sub(start));
985 self.collect_range_from_node(root, start, end, 0, &mut result)?;
986 Some(result)
987 }
988
989 fn collect_range_from_node(
991 &self,
992 node: &Arc<crate::model::piece_tree::PieceTreeNode>,
993 range_start: usize,
994 range_end: usize,
995 node_offset: usize,
996 result: &mut Vec<u8>,
997 ) -> Option<()> {
998 use crate::model::piece_tree::PieceTreeNode;
999
1000 match node.as_ref() {
1001 PieceTreeNode::Internal {
1002 left_bytes,
1003 left,
1004 right,
1005 ..
1006 } => {
1007 let left_end = node_offset + left_bytes;
1008
1009 if range_start < left_end {
1011 self.collect_range_from_node(
1012 left,
1013 range_start,
1014 range_end,
1015 node_offset,
1016 result,
1017 )?;
1018 }
1019
1020 if range_end > left_end {
1022 self.collect_range_from_node(right, range_start, range_end, left_end, result)?;
1023 }
1024 }
1025 PieceTreeNode::Leaf {
1026 location,
1027 offset,
1028 bytes,
1029 ..
1030 } => {
1031 let node_end = node_offset + bytes;
1032
1033 if range_start < node_end && range_end > node_offset {
1035 let buf = self.buffers.get(location.buffer_id())?;
1036 let data = buf.get_data()?;
1037
1038 let leaf_start = range_start.saturating_sub(node_offset);
1040 let leaf_end = (range_end - node_offset).min(*bytes);
1041
1042 if leaf_start < leaf_end {
1043 let slice = data.get(*offset + leaf_start..*offset + leaf_end)?;
1044 result.extend_from_slice(slice);
1045 }
1046 }
1047 }
1048 }
1049 Some(())
1050 }
1051
1052 fn tree_total_bytes(&self, root: &Arc<crate::model::piece_tree::PieceTreeNode>) -> usize {
1054 use crate::model::piece_tree::PieceTreeNode;
1055 match root.as_ref() {
1056 PieceTreeNode::Internal {
1057 left_bytes, right, ..
1058 } => left_bytes + self.tree_total_bytes(right),
1059 PieceTreeNode::Leaf { bytes, .. } => *bytes,
1060 }
1061 }
1062
1063 fn diff_trees_by_structure(&self) -> PieceTreeDiff {
1065 crate::model::piece_tree_diff::diff_piece_trees(
1066 &self.saved_root,
1067 &self.piece_tree.root(),
1068 &|leaf, start, len| {
1069 if len == 0 {
1070 return Some(0);
1071 }
1072 let buf = self.buffers.get(leaf.location.buffer_id())?;
1073 let data = buf.get_data()?;
1074 let start = leaf.offset + start;
1075 let end = start + len;
1076 let slice = data.get(start..end)?;
1077 let line_feeds = slice.iter().filter(|&&b| b == b'\n').count();
1078 Some(line_feeds)
1079 },
1080 )
1081 }
1082
1083 pub fn offset_to_position(&self, offset: usize) -> Option<Position> {
1085 self.piece_tree
1086 .offset_to_position(offset, &self.buffers)
1087 .map(|(line, column)| Position { line, column })
1088 }
1089
1090 pub fn position_to_offset(&self, position: Position) -> usize {
1092 self.piece_tree
1093 .position_to_offset(position.line, position.column, &self.buffers)
1094 }
1095
1096 pub fn insert_bytes(&mut self, offset: usize, text: Vec<u8>) -> Cursor {
1098 if text.is_empty() {
1099 return self.piece_tree.cursor_at_offset(offset);
1100 }
1101
1102 self.mark_content_modified();
1104
1105 let line_feed_cnt = Some(text.iter().filter(|&&b| b == b'\n').count());
1107
1108 let (buffer_location, buffer_offset, text_len) =
1110 if let Some(append_info) = self.try_append_to_existing_buffer(offset, &text) {
1111 append_info
1112 } else {
1113 let buffer_id = self.next_buffer_id;
1115 self.next_buffer_id += 1;
1116 let buffer = StringBuffer::new(buffer_id, text.clone());
1117 self.buffers.push(buffer);
1118 (BufferLocation::Added(buffer_id), 0, text.len())
1119 };
1120
1121 self.piece_tree.insert(
1123 offset,
1124 buffer_location,
1125 buffer_offset,
1126 text_len,
1127 line_feed_cnt,
1128 &self.buffers,
1129 )
1130 }
1131
1132 fn try_append_to_existing_buffer(
1135 &mut self,
1136 offset: usize,
1137 text: &[u8],
1138 ) -> Option<(BufferLocation, usize, usize)> {
1139 if text.is_empty() || offset == 0 {
1141 return None;
1142 }
1143
1144 let piece_info = self.piece_tree.find_by_offset(offset - 1)?;
1147
1148 let offset_in_piece = piece_info.offset_in_piece?;
1152 if offset_in_piece + 1 != piece_info.bytes {
1153 return None; }
1155
1156 if !matches!(piece_info.location, BufferLocation::Added(_)) {
1158 return None;
1159 }
1160
1161 let buffer_id = piece_info.location.buffer_id();
1162 let buffer = self.buffers.get_mut(buffer_id)?;
1163
1164 let buffer_len = buffer.get_data()?.len();
1166
1167 if piece_info.offset + piece_info.bytes != buffer_len {
1169 return None;
1170 }
1171
1172 let append_offset = buffer.append(text);
1174
1175 Some((piece_info.location, append_offset, text.len()))
1176 }
1177
1178 pub fn insert(&mut self, offset: usize, text: &str) {
1180 self.insert_bytes(offset, text.as_bytes().to_vec());
1181 }
1182
1183 pub fn insert_at_position(&mut self, position: Position, text: Vec<u8>) -> Cursor {
1186 if text.is_empty() {
1187 let offset = self.position_to_offset(position);
1188 return self.piece_tree.cursor_at_offset(offset);
1189 }
1190
1191 self.mark_content_modified();
1192
1193 let line_feed_cnt = text.iter().filter(|&&b| b == b'\n').count();
1195
1196 let buffer_id = self.next_buffer_id;
1198 self.next_buffer_id += 1;
1199 let buffer = StringBuffer::new(buffer_id, text.clone());
1200 self.buffers.push(buffer);
1201
1202 self.piece_tree.insert_at_position(
1204 position.line,
1205 position.column,
1206 BufferLocation::Added(buffer_id),
1207 0,
1208 text.len(),
1209 line_feed_cnt,
1210 &self.buffers,
1211 )
1212 }
1213
1214 pub fn delete_bytes(&mut self, offset: usize, bytes: usize) {
1216 if bytes == 0 || offset >= self.total_bytes() {
1217 return;
1218 }
1219
1220 self.piece_tree.delete(offset, bytes, &self.buffers);
1222
1223 self.mark_content_modified();
1224 }
1225
1226 pub fn delete(&mut self, range: Range<usize>) {
1228 if range.end > range.start {
1229 self.delete_bytes(range.start, range.end - range.start);
1230 }
1231 }
1232
1233 pub fn delete_range(&mut self, start: Position, end: Position) {
1236 self.piece_tree.delete_position_range(
1238 start.line,
1239 start.column,
1240 end.line,
1241 end.column,
1242 &self.buffers,
1243 );
1244 self.mark_content_modified();
1245 }
1246
1247 pub fn replace_content(&mut self, new_content: &str) {
1254 let bytes = new_content.len();
1255 let content_bytes = new_content.as_bytes().to_vec();
1256
1257 let line_feed_cnt = content_bytes.iter().filter(|&&b| b == b'\n').count();
1259
1260 let buffer_id = self.next_buffer_id;
1262 self.next_buffer_id += 1;
1263 let buffer = StringBuffer::new(buffer_id, content_bytes);
1264 self.buffers.push(buffer);
1265
1266 if bytes > 0 {
1268 self.piece_tree = PieceTree::new(
1269 BufferLocation::Added(buffer_id),
1270 0,
1271 bytes,
1272 Some(line_feed_cnt),
1273 );
1274 } else {
1275 self.piece_tree = PieceTree::empty();
1276 }
1277
1278 self.mark_content_modified();
1279 }
1280
1281 pub fn restore_piece_tree(&mut self, tree: &Arc<PieceTree>) {
1284 self.piece_tree = (**tree).clone();
1285 self.mark_content_modified();
1286 }
1287
1288 pub fn snapshot_piece_tree(&self) -> Arc<PieceTree> {
1291 Arc::new(self.piece_tree.clone())
1292 }
1293
1294 pub fn apply_bulk_edits(&mut self, edits: &[(usize, usize, &str)]) -> isize {
1297 let mut buffer_info: Vec<(BufferLocation, usize, usize, Option<usize>)> = Vec::new();
1302
1303 for (_, _, text) in edits {
1304 if !text.is_empty() {
1305 let buffer_id = self.next_buffer_id;
1306 self.next_buffer_id += 1;
1307 let content = text.as_bytes().to_vec();
1308 let lf_cnt = content.iter().filter(|&&b| b == b'\n').count();
1309 let bytes = content.len();
1310 let buffer = StringBuffer::new(buffer_id, content);
1311 self.buffers.push(buffer);
1312 buffer_info.push((BufferLocation::Added(buffer_id), 0, bytes, Some(lf_cnt)));
1313 }
1314 }
1316
1317 let mut idx = 0;
1319 let delta = self
1320 .piece_tree
1321 .apply_bulk_edits(edits, &self.buffers, |_text| {
1322 let info = buffer_info[idx];
1323 idx += 1;
1324 info
1325 });
1326
1327 self.mark_content_modified();
1328 delta
1329 }
1330
1331 fn get_text_range(&self, offset: usize, bytes: usize) -> Option<Vec<u8>> {
1337 if bytes == 0 {
1338 return Some(Vec::new());
1339 }
1340
1341 let mut result = Vec::with_capacity(bytes);
1342 let end_offset = offset + bytes;
1343 let mut collected = 0;
1344
1345 for piece_view in self.piece_tree.iter_pieces_in_range(offset, end_offset) {
1347 let buffer_id = piece_view.location.buffer_id();
1348 if let Some(buffer) = self.buffers.get(buffer_id) {
1349 let piece_start_in_doc = piece_view.doc_offset;
1351 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
1352
1353 let read_start = offset.max(piece_start_in_doc);
1355 let read_end = end_offset.min(piece_end_in_doc);
1356
1357 if read_end > read_start {
1358 let offset_in_piece = read_start - piece_start_in_doc;
1359 let bytes_to_read = read_end - read_start;
1360
1361 let buffer_start = piece_view.buffer_offset + offset_in_piece;
1362 let buffer_end = buffer_start + bytes_to_read;
1363
1364 let data = buffer.get_data()?;
1366
1367 if buffer_end <= data.len() {
1368 result.extend_from_slice(&data[buffer_start..buffer_end]);
1369 collected += bytes_to_read;
1370
1371 if collected >= bytes {
1372 break;
1373 }
1374 }
1375 }
1376 }
1377 }
1378
1379 Some(result)
1380 }
1381
1382 pub fn get_text_range_mut(&mut self, offset: usize, bytes: usize) -> Result<Vec<u8>> {
1390 if bytes == 0 {
1391 return Ok(Vec::new());
1392 }
1393
1394 let mut result = Vec::with_capacity(bytes);
1395 let end_offset = (offset + bytes).min(self.len());
1397 let mut current_offset = offset;
1398
1399 while current_offset < end_offset {
1401 let mut made_progress = false;
1402 let mut restarted_iteration = false;
1403
1404 for piece_view in self
1406 .piece_tree
1407 .iter_pieces_in_range(current_offset, end_offset)
1408 {
1409 let buffer_id = piece_view.location.buffer_id();
1410
1411 let needs_loading = self
1413 .buffers
1414 .get(buffer_id)
1415 .map(|b| !b.is_loaded())
1416 .unwrap_or(false);
1417
1418 if needs_loading {
1419 if piece_view.bytes > LOAD_CHUNK_SIZE {
1421 let piece_start_in_doc = piece_view.doc_offset;
1423 let offset_in_piece = current_offset.saturating_sub(piece_start_in_doc);
1424
1425 let chunk_start_in_buffer = (piece_view.buffer_offset + offset_in_piece)
1427 / CHUNK_ALIGNMENT
1428 * CHUNK_ALIGNMENT;
1429 let chunk_bytes = LOAD_CHUNK_SIZE.min(
1430 (piece_view.buffer_offset + piece_view.bytes)
1431 .saturating_sub(chunk_start_in_buffer),
1432 );
1433
1434 let chunk_start_offset_in_piece =
1436 chunk_start_in_buffer.saturating_sub(piece_view.buffer_offset);
1437 let split_start_in_doc = piece_start_in_doc + chunk_start_offset_in_piece;
1438 let split_end_in_doc = split_start_in_doc + chunk_bytes;
1439
1440 if chunk_start_offset_in_piece > 0 {
1442 self.piece_tree
1443 .split_at_offset(split_start_in_doc, &self.buffers);
1444 }
1445 if split_end_in_doc < piece_start_in_doc + piece_view.bytes {
1446 self.piece_tree
1447 .split_at_offset(split_end_in_doc, &self.buffers);
1448 }
1449
1450 let chunk_buffer = self
1452 .buffers
1453 .get(buffer_id)
1454 .context("Buffer not found")?
1455 .create_chunk_buffer(
1456 self.next_buffer_id,
1457 chunk_start_in_buffer,
1458 chunk_bytes,
1459 )
1460 .context("Failed to create chunk buffer")?;
1461
1462 self.next_buffer_id += 1;
1463 let new_buffer_id = chunk_buffer.id;
1464 self.buffers.push(chunk_buffer);
1465
1466 self.piece_tree.replace_buffer_reference(
1468 buffer_id,
1469 piece_view.buffer_offset + chunk_start_offset_in_piece,
1470 chunk_bytes,
1471 BufferLocation::Added(new_buffer_id),
1472 );
1473
1474 self.buffers
1476 .get_mut(new_buffer_id)
1477 .context("Chunk buffer not found")?
1478 .load(&*self.fs)
1479 .context("Failed to load chunk")?;
1480
1481 restarted_iteration = true;
1483 break;
1484 } else {
1485 self.buffers
1487 .get_mut(buffer_id)
1488 .context("Buffer not found")?
1489 .load(&*self.fs)
1490 .context("Failed to load buffer")?;
1491 }
1492 }
1493
1494 let piece_start_in_doc = piece_view.doc_offset;
1496 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
1497
1498 let read_start = current_offset.max(piece_start_in_doc);
1500 let read_end = end_offset.min(piece_end_in_doc);
1501
1502 if read_end > read_start {
1503 let offset_in_piece = read_start - piece_start_in_doc;
1504 let bytes_to_read = read_end - read_start;
1505
1506 let buffer_start = piece_view.buffer_offset + offset_in_piece;
1507 let buffer_end = buffer_start + bytes_to_read;
1508
1509 let buffer = self.buffers.get(buffer_id).context("Buffer not found")?;
1511 let data = buffer
1512 .get_data()
1513 .context("Buffer data unavailable after load")?;
1514
1515 anyhow::ensure!(
1516 buffer_end <= data.len(),
1517 "Buffer range out of bounds: requested {}..{}, buffer size {}",
1518 buffer_start,
1519 buffer_end,
1520 data.len()
1521 );
1522
1523 result.extend_from_slice(&data[buffer_start..buffer_end]);
1524 current_offset = read_end;
1525 made_progress = true;
1526 }
1527 }
1528
1529 if !made_progress && !restarted_iteration {
1531 tracing::error!(
1532 "get_text_range_mut: No progress at offset {} (requested range: {}..{}, buffer len: {})",
1533 current_offset,
1534 offset,
1535 end_offset,
1536 self.len()
1537 );
1538 tracing::error!(
1539 "Piece tree stats: {} total bytes",
1540 self.piece_tree.stats().total_bytes
1541 );
1542 anyhow::bail!(
1543 "Failed to read data at offset {}: no progress made (requested {}..{}, buffer len: {})",
1544 current_offset,
1545 offset,
1546 end_offset,
1547 self.len()
1548 );
1549 }
1550 }
1551
1552 Ok(result)
1553 }
1554
1555 pub fn prepare_viewport(&mut self, start_offset: usize, line_count: usize) -> Result<()> {
1568 let estimated_bytes = line_count.saturating_mul(200);
1571
1572 let remaining_bytes = self.total_bytes().saturating_sub(start_offset);
1574 let bytes_to_load = estimated_bytes.min(remaining_bytes);
1575
1576 self.get_text_range_mut(start_offset, bytes_to_load)?;
1579
1580 Ok(())
1581 }
1582
1583 pub(crate) fn get_all_text(&self) -> Option<Vec<u8>> {
1587 self.get_text_range(0, self.total_bytes())
1588 }
1589
1590 pub(crate) fn get_all_text_string(&self) -> Option<String> {
1594 self.get_all_text()
1595 .map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
1596 }
1597
1598 pub(crate) fn slice_bytes(&self, range: Range<usize>) -> Vec<u8> {
1603 self.get_text_range(range.start, range.end.saturating_sub(range.start))
1604 .unwrap_or_default()
1605 }
1606
1607 pub fn to_string(&self) -> Option<String> {
1610 self.get_all_text_string()
1611 }
1612
1613 pub fn len(&self) -> usize {
1615 self.total_bytes()
1616 }
1617
1618 pub fn is_empty(&self) -> bool {
1620 self.total_bytes() == 0
1621 }
1622
1623 pub fn file_path(&self) -> Option<&Path> {
1625 self.file_path.as_deref()
1626 }
1627
1628 pub fn set_file_path(&mut self, path: PathBuf) {
1630 self.file_path = Some(path);
1631 }
1632
1633 pub fn clear_file_path(&mut self) {
1637 self.file_path = None;
1638 }
1639
1640 pub fn extend_streaming(&mut self, source_path: &Path, new_size: usize) {
1644 let old_size = self.total_bytes();
1645 if new_size <= old_size {
1646 return;
1647 }
1648
1649 let additional_bytes = new_size - old_size;
1650
1651 let buffer_id = self.next_buffer_id;
1653 self.next_buffer_id += 1;
1654
1655 let new_buffer = StringBuffer::new_unloaded(
1656 buffer_id,
1657 source_path.to_path_buf(),
1658 old_size, additional_bytes, );
1661 self.buffers.push(new_buffer);
1662
1663 self.piece_tree.insert(
1665 old_size,
1666 BufferLocation::Stored(buffer_id),
1667 0,
1668 additional_bytes,
1669 None, &self.buffers,
1671 );
1672 }
1673
1674 pub fn is_modified(&self) -> bool {
1676 self.modified
1677 }
1678
1679 pub fn clear_modified(&mut self) {
1681 self.modified = false;
1682 }
1683
1684 pub fn set_modified(&mut self, modified: bool) {
1687 self.modified = modified;
1688 }
1689
1690 pub fn is_recovery_pending(&self) -> bool {
1692 self.recovery_pending
1693 }
1694
1695 pub fn set_recovery_pending(&mut self, pending: bool) {
1697 self.recovery_pending = pending;
1698 }
1699
1700 pub fn is_large_file(&self) -> bool {
1702 self.large_file
1703 }
1704
1705 pub fn original_file_size(&self) -> Option<usize> {
1709 self.saved_file_size
1712 }
1713
1714 pub fn get_recovery_chunks(&self) -> Vec<(usize, Vec<u8>)> {
1723 use crate::model::piece_tree::BufferLocation;
1724
1725 let mut chunks = Vec::new();
1726 let total = self.total_bytes();
1727
1728 let mut stored_bytes_before = 0;
1734
1735 for piece in self.piece_tree.iter_pieces_in_range(0, total) {
1736 match piece.location {
1737 BufferLocation::Stored(_) => {
1738 stored_bytes_before += piece.bytes;
1740 }
1741 BufferLocation::Added(buffer_id) => {
1742 if let Some(buffer) = self.buffers.iter().find(|b| b.id == buffer_id) {
1743 if let Some(data) = buffer.get_data() {
1745 let start = piece.buffer_offset;
1747 let end = start + piece.bytes;
1748 if end <= data.len() {
1749 chunks.push((stored_bytes_before, data[start..end].to_vec()));
1753 }
1754 }
1755 }
1756 }
1757 }
1758 }
1759
1760 chunks
1761 }
1762
1763 pub fn is_binary(&self) -> bool {
1765 self.is_binary
1766 }
1767
1768 pub fn line_ending(&self) -> LineEnding {
1770 self.line_ending
1771 }
1772
1773 pub fn set_line_ending(&mut self, line_ending: LineEnding) {
1778 self.line_ending = line_ending;
1779 self.mark_content_modified();
1780 }
1781
1782 pub fn set_default_line_ending(&mut self, line_ending: LineEnding) {
1787 self.line_ending = line_ending;
1788 self.original_line_ending = line_ending;
1789 }
1790
1791 pub fn detect_binary(bytes: &[u8]) -> bool {
1799 let check_len = bytes.len().min(8 * 1024);
1801 let sample = &bytes[..check_len];
1802
1803 let mut i = 0;
1804 while i < sample.len() {
1805 let byte = sample[i];
1806
1807 if byte == 0x1B && i + 1 < sample.len() {
1810 let next = sample[i + 1];
1811 if next == b'[' || next == b']' {
1812 i += 2;
1814 while i < sample.len() {
1815 let c = sample[i];
1816 if (0x40..=0x7E).contains(&c) {
1818 break;
1819 }
1820 i += 1;
1821 }
1822 i += 1;
1823 continue;
1824 }
1825 }
1826
1827 if byte == 0x00 {
1829 return true;
1830 }
1831
1832 if byte < 0x20
1837 && byte != 0x09
1838 && byte != 0x0A
1839 && byte != 0x0D
1840 && byte != 0x0C
1841 && byte != 0x0B
1842 && byte != 0x1B
1843 {
1844 return true;
1845 }
1846
1847 if byte == 0x7F {
1849 return true;
1850 }
1851
1852 i += 1;
1853 }
1854
1855 false
1856 }
1857
1858 pub fn detect_line_ending(bytes: &[u8]) -> LineEnding {
1863 let check_len = bytes.len().min(8 * 1024);
1865 let sample = &bytes[..check_len];
1866
1867 let mut crlf_count = 0;
1868 let mut lf_only_count = 0;
1869 let mut cr_only_count = 0;
1870
1871 let mut i = 0;
1872 while i < sample.len() {
1873 if sample[i] == b'\r' {
1874 if i + 1 < sample.len() && sample[i + 1] == b'\n' {
1876 crlf_count += 1;
1877 i += 2; continue;
1879 } else {
1880 cr_only_count += 1;
1882 }
1883 } else if sample[i] == b'\n' {
1884 lf_only_count += 1;
1886 }
1887 i += 1;
1888 }
1889
1890 if crlf_count > lf_only_count && crlf_count > cr_only_count {
1892 LineEnding::CRLF
1893 } else if cr_only_count > lf_only_count && cr_only_count > crlf_count {
1894 LineEnding::CR
1895 } else {
1896 LineEnding::LF
1898 }
1899 }
1900
1901 #[allow(dead_code)] pub fn normalize_line_endings(bytes: Vec<u8>) -> Vec<u8> {
1908 let mut normalized = Vec::with_capacity(bytes.len());
1909 let mut i = 0;
1910
1911 while i < bytes.len() {
1912 if bytes[i] == b'\r' {
1913 if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
1915 normalized.push(b'\n');
1917 i += 2; continue;
1919 } else {
1920 normalized.push(b'\n');
1922 }
1923 } else {
1924 normalized.push(bytes[i]);
1926 }
1927 i += 1;
1928 }
1929
1930 normalized
1931 }
1932
1933 fn convert_line_endings_to(bytes: &[u8], target_ending: LineEnding) -> Vec<u8> {
1938 let mut normalized = Vec::with_capacity(bytes.len());
1940 let mut i = 0;
1941 while i < bytes.len() {
1942 if bytes[i] == b'\r' {
1943 if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
1945 normalized.push(b'\n');
1947 i += 2;
1948 continue;
1949 } else {
1950 normalized.push(b'\n');
1952 }
1953 } else {
1954 normalized.push(bytes[i]);
1955 }
1956 i += 1;
1957 }
1958
1959 if target_ending == LineEnding::LF {
1961 return normalized;
1962 }
1963
1964 let replacement = target_ending.as_str().as_bytes();
1966 let mut result = Vec::with_capacity(normalized.len() + normalized.len() / 10);
1967
1968 for byte in normalized {
1969 if byte == b'\n' {
1970 result.extend_from_slice(replacement);
1971 } else {
1972 result.push(byte);
1973 }
1974 }
1975
1976 result
1977 }
1978
1979 pub fn get_line(&self, line: usize) -> Option<Vec<u8>> {
1981 let (start, end) = self.piece_tree.line_range(line, &self.buffers)?;
1982
1983 let bytes = if let Some(end_offset) = end {
1984 end_offset.saturating_sub(start)
1985 } else {
1986 self.total_bytes().saturating_sub(start)
1987 };
1988
1989 self.get_text_range(start, bytes)
1990 }
1991
1992 pub fn line_start_offset(&self, line: usize) -> Option<usize> {
1994 let (start, _) = self.piece_tree.line_range(line, &self.buffers)?;
1995 Some(start)
1996 }
1997
1998 pub fn piece_info_at_offset(&self, offset: usize) -> Option<PieceInfo> {
2000 self.piece_tree.find_by_offset(offset)
2001 }
2002
2003 pub fn stats(&self) -> TreeStats {
2005 self.piece_tree.stats()
2006 }
2007
2008 pub fn find_next(&self, pattern: &str, start_pos: usize) -> Option<usize> {
2012 if pattern.is_empty() {
2013 return None;
2014 }
2015
2016 let pattern_bytes = pattern.as_bytes();
2017 let buffer_len = self.len();
2018
2019 if start_pos < buffer_len {
2021 if let Some(offset) = self.find_pattern(start_pos, buffer_len, pattern_bytes) {
2022 return Some(offset);
2023 }
2024 }
2025
2026 if start_pos > 0 {
2028 if let Some(offset) = self.find_pattern(0, start_pos, pattern_bytes) {
2029 return Some(offset);
2030 }
2031 }
2032
2033 None
2034 }
2035
2036 pub fn find_next_in_range(
2040 &self,
2041 pattern: &str,
2042 start_pos: usize,
2043 range: Option<Range<usize>>,
2044 ) -> Option<usize> {
2045 if pattern.is_empty() {
2046 return None;
2047 }
2048
2049 if let Some(search_range) = range {
2050 let pattern_bytes = pattern.as_bytes();
2052 let search_start = start_pos.max(search_range.start);
2053 let search_end = search_range.end.min(self.len());
2054
2055 if search_start < search_end {
2056 self.find_pattern(search_start, search_end, pattern_bytes)
2057 } else {
2058 None
2059 }
2060 } else {
2061 self.find_next(pattern, start_pos)
2063 }
2064 }
2065
2066 fn find_pattern(&self, start: usize, end: usize, pattern: &[u8]) -> Option<usize> {
2068 if pattern.is_empty() || start >= end {
2069 return None;
2070 }
2071
2072 const CHUNK_SIZE: usize = 65536; let overlap = pattern.len().saturating_sub(1).max(1);
2074
2075 let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, overlap);
2077
2078 for chunk in chunks {
2079 if let Some(pos) = Self::find_in_bytes(&chunk.buffer, pattern) {
2081 let match_end = pos + pattern.len();
2082 if match_end > chunk.valid_start {
2085 let absolute_pos = chunk.absolute_pos + pos;
2086 if absolute_pos + pattern.len() <= end {
2088 return Some(absolute_pos);
2089 }
2090 }
2091 }
2092 }
2093
2094 None
2095 }
2096
2097 fn find_in_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
2099 if needle.is_empty() || needle.len() > haystack.len() {
2100 return None;
2101 }
2102
2103 (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
2104 }
2105
2106 pub fn find_next_regex(&self, regex: &Regex, start_pos: usize) -> Option<usize> {
2108 let buffer_len = self.len();
2109
2110 if start_pos < buffer_len {
2112 if let Some(offset) = self.find_regex(start_pos, buffer_len, regex) {
2113 return Some(offset);
2114 }
2115 }
2116
2117 if start_pos > 0 {
2119 if let Some(offset) = self.find_regex(0, start_pos, regex) {
2120 return Some(offset);
2121 }
2122 }
2123
2124 None
2125 }
2126
2127 pub fn find_next_regex_in_range(
2129 &self,
2130 regex: &Regex,
2131 start_pos: usize,
2132 range: Option<Range<usize>>,
2133 ) -> Option<usize> {
2134 if let Some(search_range) = range {
2135 let search_start = start_pos.max(search_range.start);
2136 let search_end = search_range.end.min(self.len());
2137
2138 if search_start < search_end {
2139 self.find_regex(search_start, search_end, regex)
2140 } else {
2141 None
2142 }
2143 } else {
2144 self.find_next_regex(regex, start_pos)
2145 }
2146 }
2147
2148 fn find_regex(&self, start: usize, end: usize, regex: &Regex) -> Option<usize> {
2150 if start >= end {
2151 return None;
2152 }
2153
2154 const CHUNK_SIZE: usize = 1048576; const OVERLAP: usize = 4096; let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, OVERLAP);
2160
2161 for chunk in chunks {
2162 if let Some(mat) = regex.find(&chunk.buffer) {
2164 let match_end = mat.end();
2165 if match_end > chunk.valid_start {
2168 let absolute_pos = chunk.absolute_pos + mat.start();
2169 let match_len = mat.end() - mat.start();
2171 if absolute_pos + match_len <= end {
2172 return Some(absolute_pos);
2173 }
2174 }
2175 }
2176 }
2177
2178 None
2179 }
2180
2181 pub fn replace_range(&mut self, range: Range<usize>, replacement: &str) -> bool {
2183 if range.start >= self.len() {
2184 return false;
2185 }
2186
2187 let end = range.end.min(self.len());
2188 if end > range.start {
2189 self.delete_bytes(range.start, end - range.start);
2190 }
2191
2192 if !replacement.is_empty() {
2193 self.insert(range.start, replacement);
2194 }
2195
2196 true
2197 }
2198
2199 pub fn replace_next(
2201 &mut self,
2202 pattern: &str,
2203 replacement: &str,
2204 start_pos: usize,
2205 range: Option<Range<usize>>,
2206 ) -> Option<usize> {
2207 if let Some(pos) = self.find_next_in_range(pattern, start_pos, range.clone()) {
2208 self.replace_range(pos..pos + pattern.len(), replacement);
2209 Some(pos)
2210 } else {
2211 None
2212 }
2213 }
2214
2215 pub fn replace_all(&mut self, pattern: &str, replacement: &str) -> usize {
2217 if pattern.is_empty() {
2218 return 0;
2219 }
2220
2221 let mut count = 0;
2222 let mut pos = 0;
2223
2224 while let Some(found_pos) = self.find_next_in_range(pattern, pos, Some(0..self.len())) {
2228 self.replace_range(found_pos..found_pos + pattern.len(), replacement);
2229 count += 1;
2230
2231 pos = found_pos + replacement.len();
2233
2234 if pos >= self.len() {
2236 break;
2237 }
2238 }
2239
2240 count
2241 }
2242
2243 pub fn replace_all_regex(&mut self, regex: &Regex, replacement: &str) -> Result<usize> {
2245 let mut count = 0;
2246 let mut pos = 0;
2247
2248 while let Some(found_pos) = self.find_next_regex_in_range(regex, pos, Some(0..self.len())) {
2249 let text = self
2251 .get_text_range_mut(found_pos, self.len() - found_pos)
2252 .context("Failed to read text for regex match")?;
2253
2254 if let Some(mat) = regex.find(&text) {
2255 self.replace_range(found_pos..found_pos + mat.len(), replacement);
2256 count += 1;
2257 pos = found_pos + replacement.len();
2258
2259 if pos >= self.len() {
2260 break;
2261 }
2262 } else {
2263 break;
2264 }
2265 }
2266
2267 Ok(count)
2268 }
2269
2270 pub fn position_to_line_col(&self, byte_pos: usize) -> (usize, usize) {
2274 self.offset_to_position(byte_pos)
2275 .map(|pos| (pos.line, pos.column))
2276 .unwrap_or_else(|| (byte_pos / 80, 0)) }
2278
2279 pub fn line_col_to_position(&self, line: usize, character: usize) -> usize {
2283 if let Some((start, end)) = self.piece_tree.line_range(line, &self.buffers) {
2284 let line_len = if let Some(end_offset) = end {
2286 end_offset.saturating_sub(start)
2287 } else {
2288 self.total_bytes().saturating_sub(start)
2289 };
2290 let byte_offset = character.min(line_len);
2291 start + byte_offset
2292 } else {
2293 self.len()
2295 }
2296 }
2297
2298 pub fn position_to_lsp_position(&self, byte_pos: usize) -> (usize, usize) {
2301 let (line, column_bytes) = self
2302 .offset_to_position(byte_pos)
2303 .map(|pos| (pos.line, pos.column))
2304 .unwrap_or_else(|| (byte_pos / 80, 0)); if let Some(line_bytes) = self.get_line(line) {
2308 let text_before = &line_bytes[..column_bytes.min(line_bytes.len())];
2310 let text_str = String::from_utf8_lossy(text_before);
2311 let utf16_offset = text_str.encode_utf16().count();
2312 (line, utf16_offset)
2313 } else {
2314 (line, 0)
2315 }
2316 }
2317
2318 pub fn lsp_position_to_byte(&self, line: usize, utf16_offset: usize) -> usize {
2322 if let Some((line_start, end)) = self.piece_tree.line_range(line, &self.buffers) {
2323 let line_len = if let Some(end_offset) = end {
2325 end_offset.saturating_sub(line_start)
2326 } else {
2327 self.total_bytes().saturating_sub(line_start)
2328 };
2329
2330 if line_len > 0 {
2331 let Some(line_bytes) = self.get_text_range(line_start, line_len) else {
2333 return line_start;
2334 };
2335 let line_str = String::from_utf8_lossy(&line_bytes);
2336
2337 let mut utf16_count = 0;
2339 let mut byte_offset = 0;
2340
2341 for ch in line_str.chars() {
2342 if utf16_count >= utf16_offset {
2343 break;
2344 }
2345 utf16_count += ch.len_utf16();
2346 byte_offset += ch.len_utf8();
2347 }
2348
2349 line_start + byte_offset
2350 } else {
2351 line_start
2352 }
2353 } else {
2354 self.len()
2356 }
2357 }
2358
2359 pub fn prev_char_boundary(&self, pos: usize) -> usize {
2363 if pos == 0 {
2364 return 0;
2365 }
2366
2367 let start = pos.saturating_sub(4);
2369 let Some(bytes) = self.get_text_range(start, pos - start) else {
2370 return pos;
2372 };
2373
2374 for i in (0..bytes.len()).rev() {
2376 let byte = bytes[i];
2377 if (byte & 0b1100_0000) != 0b1000_0000 {
2379 return start + i;
2380 }
2381 }
2382
2383 pos.saturating_sub(1)
2385 }
2386
2387 pub fn next_char_boundary(&self, pos: usize) -> usize {
2389 let len = self.len();
2390 if pos >= len {
2391 return len;
2392 }
2393
2394 let end = (pos + 5).min(len);
2396 let Some(bytes) = self.get_text_range(pos, end - pos) else {
2397 return pos;
2399 };
2400
2401 for (i, &byte) in bytes.iter().enumerate().skip(1) {
2403 if (byte & 0b1100_0000) != 0b1000_0000 {
2405 return pos + i;
2406 }
2407 }
2408
2409 end
2411 }
2412
2413 #[inline]
2417 fn is_utf8_continuation_byte(byte: u8) -> bool {
2418 (byte & 0b1100_0000) == 0b1000_0000
2419 }
2420
2421 pub fn snap_to_char_boundary(&self, pos: usize) -> usize {
2425 let len = self.len();
2426 if pos == 0 || pos >= len {
2427 return pos.min(len);
2428 }
2429
2430 let Some(bytes) = self.get_text_range(pos, 1) else {
2432 return pos;
2434 };
2435
2436 if !Self::is_utf8_continuation_byte(bytes[0]) {
2438 return pos;
2440 }
2441
2442 self.prev_char_boundary(pos)
2444 }
2445
2446 pub fn prev_grapheme_boundary(&self, pos: usize) -> usize {
2452 if pos == 0 {
2453 return 0;
2454 }
2455
2456 let raw_start = pos.saturating_sub(32);
2461 let start = if raw_start == 0 {
2462 0
2463 } else {
2464 self.prev_char_boundary(raw_start + 1)
2466 };
2467
2468 let Some(bytes) = self.get_text_range(start, pos - start) else {
2469 return self.prev_char_boundary(pos);
2471 };
2472
2473 let text = match std::str::from_utf8(&bytes) {
2474 Ok(s) => s,
2475 Err(e) => {
2476 let valid_bytes = &bytes[..e.valid_up_to()];
2479 match std::str::from_utf8(valid_bytes) {
2480 Ok(s) if !s.is_empty() => s,
2481 _ => return self.prev_char_boundary(pos),
2482 }
2483 }
2484 };
2485
2486 let rel_pos = pos - start;
2488 let new_rel_pos = grapheme::prev_grapheme_boundary(text, rel_pos);
2489
2490 if new_rel_pos == 0 && start > 0 {
2493 return self.prev_grapheme_boundary(start);
2494 }
2495
2496 start + new_rel_pos
2497 }
2498
2499 pub fn next_grapheme_boundary(&self, pos: usize) -> usize {
2505 let len = self.len();
2506 if pos >= len {
2507 return len;
2508 }
2509
2510 let end = (pos + 32).min(len);
2513 let Some(bytes) = self.get_text_range(pos, end - pos) else {
2514 return self.next_char_boundary(pos);
2516 };
2517
2518 let text = match std::str::from_utf8(&bytes) {
2521 Ok(s) => s,
2522 Err(e) => {
2523 let valid_bytes = &bytes[..e.valid_up_to()];
2526 match std::str::from_utf8(valid_bytes) {
2527 Ok(s) if !s.is_empty() => s,
2528 _ => return self.next_char_boundary(pos),
2529 }
2530 }
2531 };
2532
2533 let new_rel_pos = grapheme::next_grapheme_boundary(text, 0);
2535 pos + new_rel_pos
2536 }
2537
2538 pub fn prev_word_boundary(&self, pos: usize) -> usize {
2540 if pos == 0 {
2541 return 0;
2542 }
2543
2544 let start = pos.saturating_sub(256).max(0);
2546 let Some(bytes) = self.get_text_range(start, pos - start) else {
2547 return pos;
2549 };
2550 let text = String::from_utf8_lossy(&bytes);
2551
2552 let mut found_word_char = false;
2553 let chars: Vec<char> = text.chars().collect();
2554
2555 for i in (0..chars.len()).rev() {
2556 let ch = chars[i];
2557 let is_word_char = ch.is_alphanumeric() || ch == '_';
2558
2559 if found_word_char && !is_word_char {
2560 let byte_offset: usize = chars[0..=i].iter().map(|c| c.len_utf8()).sum();
2563 return start + byte_offset;
2564 }
2565
2566 if is_word_char {
2567 found_word_char = true;
2568 }
2569 }
2570
2571 0
2572 }
2573
2574 pub fn next_word_boundary(&self, pos: usize) -> usize {
2576 let len = self.len();
2577 if pos >= len {
2578 return len;
2579 }
2580
2581 let end = (pos + 256).min(len);
2583 let Some(bytes) = self.get_text_range(pos, end - pos) else {
2584 return pos;
2586 };
2587 let text = String::from_utf8_lossy(&bytes);
2588
2589 let mut found_word_char = false;
2590 let mut byte_offset = 0;
2591
2592 for ch in text.chars() {
2593 let is_word_char = ch.is_alphanumeric() || ch == '_';
2594
2595 if found_word_char && !is_word_char {
2596 return pos + byte_offset;
2598 }
2599
2600 if is_word_char {
2601 found_word_char = true;
2602 }
2603
2604 byte_offset += ch.len_utf8();
2605 }
2606
2607 len
2608 }
2609
2610 pub fn line_iterator(
2615 &mut self,
2616 byte_pos: usize,
2617 estimated_line_length: usize,
2618 ) -> LineIterator<'_> {
2619 LineIterator::new(self, byte_pos, estimated_line_length)
2620 }
2621
2622 pub fn iter_lines_from(
2636 &mut self,
2637 byte_pos: usize,
2638 max_lines: usize,
2639 ) -> Result<TextBufferLineIterator> {
2640 TextBufferLineIterator::new(self, byte_pos, max_lines)
2641 }
2642
2643 pub fn get_line_number(&self, byte_offset: usize) -> usize {
2656 self.offset_to_position(byte_offset)
2657 .map(|pos| pos.line)
2658 .unwrap_or_else(|| {
2659 byte_offset / 80
2661 })
2662 }
2663
2664 pub fn populate_line_cache(&mut self, start_byte: usize, _line_count: usize) -> usize {
2698 self.get_line_number(start_byte)
2701 }
2702
2703 pub fn get_cached_byte_offset_for_line(&self, line_number: usize) -> Option<usize> {
2705 self.line_start_offset(line_number)
2706 }
2707
2708 pub fn invalidate_line_cache_from(&mut self, _byte_offset: usize) {
2710 }
2712
2713 pub fn handle_line_cache_insertion(&mut self, _byte_offset: usize, _bytes_inserted: usize) {
2715 }
2717
2718 pub fn handle_line_cache_deletion(&mut self, _byte_offset: usize, _bytes_deleted: usize) {
2720 }
2722
2723 pub fn clear_line_cache(&mut self) {
2725 }
2727
2728 #[cfg(test)]
2732 pub fn from_str_test(s: &str) -> Self {
2733 Self::from_bytes(
2734 s.as_bytes().to_vec(),
2735 std::sync::Arc::new(crate::model::filesystem::StdFileSystem),
2736 )
2737 }
2738
2739 #[cfg(test)]
2741 pub fn new_test() -> Self {
2742 Self::empty(std::sync::Arc::new(crate::model::filesystem::StdFileSystem))
2743 }
2744}
2745
2746pub type Buffer = TextBuffer;
2748
2749pub use crate::primitives::line_iterator::LineIterator;
2751
2752#[derive(Debug)]
2758pub struct ChunkInfo {
2759 pub buffer: Vec<u8>,
2761
2762 pub absolute_pos: usize,
2764
2765 pub valid_start: usize,
2768}
2769
2770pub struct OverlappingChunks<'a> {
2798 piece_iter: PieceRangeIter,
2799 buffers: &'a [StringBuffer],
2800
2801 buffer: Vec<u8>,
2803 buffer_absolute_pos: usize,
2804
2805 current_pos: usize,
2807 end_pos: usize,
2808
2809 chunk_size: usize,
2811 overlap: usize,
2812
2813 first_chunk: bool,
2815
2816 current_piece_data: Option<Vec<u8>>,
2818 current_piece_offset: usize,
2819}
2820
2821impl<'a> OverlappingChunks<'a> {
2822 pub fn new(
2837 text_buffer: &'a TextBuffer,
2838 start: usize,
2839 end: usize,
2840 chunk_size: usize,
2841 overlap: usize,
2842 ) -> Self {
2843 let piece_iter = text_buffer.piece_tree.iter_pieces_in_range(start, end);
2844
2845 Self {
2846 piece_iter,
2847 buffers: &text_buffer.buffers,
2848 buffer: Vec::with_capacity(chunk_size + overlap),
2849 buffer_absolute_pos: start,
2850 current_pos: start,
2851 end_pos: end,
2852 chunk_size,
2853 overlap,
2854 first_chunk: true,
2855 current_piece_data: None,
2856 current_piece_offset: 0,
2857 }
2858 }
2859
2860 fn read_byte(&mut self) -> Option<u8> {
2862 loop {
2863 if let Some(ref data) = self.current_piece_data {
2865 if self.current_piece_offset < data.len() {
2866 let byte = data[self.current_piece_offset];
2867 self.current_piece_offset += 1;
2868 self.current_pos += 1;
2869 return Some(byte);
2870 } else {
2871 self.current_piece_data = None;
2873 self.current_piece_offset = 0;
2874 }
2875 }
2876
2877 if let Some(piece_view) = self.piece_iter.next() {
2879 let buffer_id = piece_view.location.buffer_id();
2880 if let Some(buffer) = self.buffers.get(buffer_id) {
2881 let piece_start_in_doc = piece_view.doc_offset;
2883 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2884
2885 let read_start = self.current_pos.max(piece_start_in_doc);
2887 let read_end = self.end_pos.min(piece_end_in_doc);
2888
2889 if read_end > read_start {
2890 let offset_in_piece = read_start - piece_start_in_doc;
2891 let bytes_to_read = read_end - read_start;
2892
2893 let buffer_start = piece_view.buffer_offset + offset_in_piece;
2894 let buffer_end = buffer_start + bytes_to_read;
2895
2896 if let Some(data) = buffer.get_data() {
2897 if buffer_end <= data.len() {
2898 self.current_piece_data =
2900 Some(data[buffer_start..buffer_end].to_vec());
2901 self.current_piece_offset = 0;
2902 continue;
2903 }
2904 }
2905 }
2906 }
2907 }
2908
2909 return None;
2911 }
2912 }
2913
2914 fn fill_next_chunk(&mut self) -> bool {
2916 if self.first_chunk {
2917 self.first_chunk = false;
2919 while self.buffer.len() < self.chunk_size && self.current_pos < self.end_pos {
2920 if let Some(byte) = self.read_byte() {
2921 self.buffer.push(byte);
2922 } else {
2923 break;
2924 }
2925 }
2926 !self.buffer.is_empty()
2927 } else {
2928 if self.current_pos >= self.end_pos {
2930 return false;
2931 }
2932
2933 if self.buffer.len() > self.overlap {
2935 let drain_amount = self.buffer.len() - self.overlap;
2936 self.buffer.drain(0..drain_amount);
2937 self.buffer_absolute_pos += drain_amount;
2938 }
2939
2940 let before_len = self.buffer.len();
2942 let target_len = self.overlap + self.chunk_size;
2943 while self.buffer.len() < target_len && self.current_pos < self.end_pos {
2944 if let Some(byte) = self.read_byte() {
2945 self.buffer.push(byte);
2946 } else {
2947 break;
2948 }
2949 }
2950
2951 self.buffer.len() > before_len
2953 }
2954 }
2955}
2956
2957impl<'a> Iterator for OverlappingChunks<'a> {
2958 type Item = ChunkInfo;
2959
2960 fn next(&mut self) -> Option<Self::Item> {
2961 let is_first = self.buffer_absolute_pos == self.current_pos;
2963
2964 if !self.fill_next_chunk() {
2965 return None;
2966 }
2967
2968 let valid_start = if is_first {
2971 0
2972 } else {
2973 self.overlap.min(self.buffer.len())
2974 };
2975
2976 Some(ChunkInfo {
2977 buffer: self.buffer.clone(),
2978 absolute_pos: self.buffer_absolute_pos,
2979 valid_start,
2980 })
2981 }
2982}
2983
2984#[cfg(test)]
2985mod tests {
2986 use crate::model::filesystem::StdFileSystem;
2987 use std::sync::Arc;
2988
2989 fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
2990 Arc::new(StdFileSystem)
2991 }
2992 use super::*;
2993
2994 #[test]
2995 fn test_empty_buffer() {
2996 let buffer = TextBuffer::empty(test_fs());
2997 assert_eq!(buffer.total_bytes(), 0);
2998 assert_eq!(buffer.line_count(), Some(1)); }
3000
3001 #[test]
3002 fn test_line_positions_multiline() {
3003 let buffer = TextBuffer::from_bytes(b"Hello\nNew Line\nWorld!".to_vec(), test_fs());
3004
3005 assert_eq!(buffer.line_count(), Some(3));
3007
3008 assert_eq!(buffer.line_start_offset(0), Some(0)); assert_eq!(buffer.line_start_offset(1), Some(6)); assert_eq!(buffer.line_start_offset(2), Some(15)); assert_eq!(buffer.offset_to_position(0).unwrap().line, 0); assert_eq!(buffer.offset_to_position(5).unwrap().line, 0); assert_eq!(buffer.offset_to_position(6).unwrap().line, 1); assert_eq!(buffer.offset_to_position(14).unwrap().line, 1); assert_eq!(buffer.offset_to_position(15).unwrap().line, 2); assert_eq!(buffer.line_col_to_position(0, 5), 5); assert_eq!(buffer.line_col_to_position(1, 0), 6); assert_eq!(buffer.line_col_to_position(1, 8), 14); assert_eq!(buffer.line_col_to_position(2, 0), 15); }
3026
3027 #[test]
3028 fn test_new_from_content() {
3029 let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
3030 assert_eq!(buffer.total_bytes(), 11);
3031 assert_eq!(buffer.line_count(), Some(2));
3032 }
3033
3034 #[test]
3035 fn test_get_all_text() {
3036 let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
3037 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld");
3038 }
3039
3040 #[test]
3041 fn test_insert_at_start() {
3042 let mut buffer = TextBuffer::from_bytes(b"world".to_vec(), test_fs());
3043 buffer.insert_bytes(0, b"hello ".to_vec());
3044
3045 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
3046 assert_eq!(buffer.total_bytes(), 11);
3047 }
3048
3049 #[test]
3050 fn test_insert_in_middle() {
3051 let mut buffer = TextBuffer::from_bytes(b"helloworld".to_vec(), test_fs());
3052 buffer.insert_bytes(5, b" ".to_vec());
3053
3054 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
3055 assert_eq!(buffer.total_bytes(), 11);
3056 }
3057
3058 #[test]
3059 fn test_insert_at_end() {
3060 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
3061 buffer.insert_bytes(5, b" world".to_vec());
3062
3063 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
3064 assert_eq!(buffer.total_bytes(), 11);
3065 }
3066
3067 #[test]
3068 fn test_insert_with_newlines() {
3069 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
3070 buffer.insert_bytes(5, b"\nworld\ntest".to_vec());
3071
3072 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld\ntest");
3073 assert_eq!(buffer.line_count(), Some(3));
3074 }
3075
3076 #[test]
3077 fn test_delete_from_start() {
3078 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
3079 buffer.delete_bytes(0, 6);
3080
3081 assert_eq!(buffer.get_all_text().unwrap(), b"world");
3082 assert_eq!(buffer.total_bytes(), 5);
3083 }
3084
3085 #[test]
3086 fn test_delete_from_middle() {
3087 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
3088 buffer.delete_bytes(5, 1);
3089
3090 assert_eq!(buffer.get_all_text().unwrap(), b"helloworld");
3091 assert_eq!(buffer.total_bytes(), 10);
3092 }
3093
3094 #[test]
3095 fn test_delete_from_end() {
3096 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
3097 buffer.delete_bytes(6, 5);
3098
3099 assert_eq!(buffer.get_all_text().unwrap(), b"hello ");
3100 assert_eq!(buffer.total_bytes(), 6);
3101 }
3102
3103 #[test]
3104 fn test_delete_with_newlines() {
3105 let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
3106 buffer.delete_bytes(5, 7); assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
3109 assert_eq!(buffer.line_count(), Some(1));
3110 }
3111
3112 #[test]
3113 fn test_offset_position_conversions() {
3114 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
3115
3116 let pos = buffer.offset_to_position(0);
3117 assert_eq!(pos, Some(Position { line: 0, column: 0 }));
3118
3119 let pos = buffer.offset_to_position(6);
3120 assert_eq!(pos, Some(Position { line: 1, column: 0 }));
3121
3122 let offset = buffer.position_to_offset(Position { line: 1, column: 0 });
3123 assert_eq!(offset, 6);
3124 }
3125
3126 #[test]
3127 fn test_insert_at_position() {
3128 let mut buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec(), test_fs());
3129 buffer.insert_at_position(Position { line: 1, column: 0 }, b"beautiful ".to_vec());
3130
3131 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nbeautiful world");
3132 }
3133
3134 #[test]
3135 fn test_delete_range() {
3136 let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
3137
3138 let start = Position { line: 0, column: 5 };
3139 let end = Position { line: 2, column: 0 };
3140 buffer.delete_range(start, end);
3141
3142 assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
3143 }
3144
3145 #[test]
3146 fn test_get_line() {
3147 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
3148
3149 assert_eq!(buffer.get_line(0), Some(b"hello\n".to_vec()));
3150 assert_eq!(buffer.get_line(1), Some(b"world\n".to_vec()));
3151 assert_eq!(buffer.get_line(2), Some(b"test".to_vec()));
3152 assert_eq!(buffer.get_line(3), None);
3153 }
3154
3155 #[test]
3156 fn test_multiple_operations() {
3157 let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
3158
3159 buffer.insert_bytes(0, b"start\n".to_vec());
3160 assert_eq!(buffer.line_count(), Some(4));
3161
3162 buffer.delete_bytes(6, 6); assert_eq!(buffer.line_count(), Some(3));
3164
3165 buffer.insert_bytes(6, b"new\n".to_vec());
3166 assert_eq!(buffer.line_count(), Some(4));
3167
3168 let text = buffer.get_all_text().unwrap();
3169 assert_eq!(text, b"start\nnew\nline2\nline3");
3170 }
3171
3172 #[test]
3173 fn test_get_text_range() {
3174 let buffer = TextBuffer::from_bytes(b"hello world".to_vec(), test_fs());
3175
3176 assert_eq!(buffer.get_text_range(0, 5), Some(b"hello".to_vec()));
3177 assert_eq!(buffer.get_text_range(6, 5), Some(b"world".to_vec()));
3178 assert_eq!(buffer.get_text_range(0, 11), Some(b"hello world".to_vec()));
3179 }
3180
3181 #[test]
3182 fn test_empty_operations() {
3183 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec(), test_fs());
3184
3185 buffer.insert_bytes(2, Vec::new());
3186 assert_eq!(buffer.get_all_text().unwrap(), b"hello");
3187
3188 buffer.delete_bytes(2, 0);
3189 assert_eq!(buffer.get_all_text().unwrap(), b"hello");
3190 }
3191
3192 #[test]
3193 fn test_sequential_inserts_at_beginning() {
3194 let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
3196
3197 buffer.delete_bytes(0, 12);
3199 assert_eq!(buffer.get_all_text().unwrap(), b"");
3200
3201 buffer.insert_bytes(0, vec![b'a']);
3203 assert_eq!(buffer.get_all_text().unwrap(), b"a");
3204
3205 buffer.insert_bytes(0, vec![b'b']);
3207 assert_eq!(buffer.get_all_text().unwrap(), b"ba");
3208 }
3209
3210 mod large_file_support {
3213 use super::*;
3214 use crate::model::piece_tree::StringBuffer;
3215 use std::fs::File;
3216 use std::io::Write;
3217 use tempfile::TempDir;
3218
3219 #[test]
3222 fn test_line_feed_count_is_some_for_loaded_buffer() {
3223 let buffer = StringBuffer::new(0, b"hello\nworld\ntest".to_vec());
3224 assert_eq!(buffer.line_feed_count(), Some(2));
3225 }
3226
3227 #[test]
3228 fn test_line_feed_count_is_none_for_unloaded_buffer() {
3229 let temp_dir = TempDir::new().unwrap();
3230 let file_path = temp_dir.path().join("test.txt");
3231
3232 let buffer = StringBuffer::new_unloaded(0, file_path, 0, 100);
3233 assert_eq!(buffer.line_feed_count(), None);
3234 }
3235
3236 #[test]
3237 fn test_line_count_is_some_for_small_buffer() {
3238 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec(), test_fs());
3239 assert_eq!(buffer.line_count(), Some(3));
3240 }
3241
3242 #[test]
3243 fn test_piece_tree_works_with_none_line_count() {
3244 let buffer = StringBuffer::new_loaded(0, b"hello\nworld".to_vec(), false);
3246 assert_eq!(buffer.line_feed_count(), None);
3247
3248 use crate::model::piece_tree::{BufferLocation, PieceTree};
3250 let tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, None);
3251
3252 assert_eq!(tree.line_count(), None);
3254 }
3255
3256 #[test]
3259 fn test_buffer_data_loaded_variant() {
3260 let data = b"hello world".to_vec();
3261 let buffer = StringBuffer::new_loaded(0, data.clone(), true);
3262
3263 assert!(buffer.is_loaded());
3264 assert_eq!(buffer.get_data(), Some(&data[..]));
3265 assert!(buffer.get_line_starts().is_some());
3266 }
3267
3268 #[test]
3269 fn test_buffer_data_loaded_without_line_starts() {
3270 let data = b"hello\nworld".to_vec();
3271 let buffer = StringBuffer::new_loaded(0, data.clone(), false);
3272
3273 assert!(buffer.is_loaded());
3274 assert_eq!(buffer.get_data(), Some(&data[..]));
3275 assert_eq!(buffer.get_line_starts(), None); }
3277
3278 #[test]
3279 fn test_buffer_data_unloaded_variant() {
3280 let temp_dir = TempDir::new().unwrap();
3281 let file_path = temp_dir.path().join("test.txt");
3282
3283 let buffer = StringBuffer::new_unloaded(0, file_path.clone(), 0, 100);
3284
3285 assert!(!buffer.is_loaded());
3286 assert_eq!(buffer.get_data(), None);
3287 assert_eq!(buffer.get_line_starts(), None);
3288 }
3289
3290 #[test]
3291 fn test_buffer_load_method() {
3292 let temp_dir = TempDir::new().unwrap();
3293 let file_path = temp_dir.path().join("test.txt");
3294
3295 let test_data = b"hello world";
3297 File::create(&file_path)
3298 .unwrap()
3299 .write_all(test_data)
3300 .unwrap();
3301
3302 let mut buffer = StringBuffer::new_unloaded(0, file_path, 0, test_data.len());
3304 assert!(!buffer.is_loaded());
3305
3306 let fs = crate::model::filesystem::StdFileSystem;
3308 buffer.load(&fs).unwrap();
3309
3310 assert!(buffer.is_loaded());
3312 assert_eq!(buffer.get_data(), Some(&test_data[..]));
3313 }
3314
3315 #[test]
3316 fn test_string_buffer_new_vs_new_loaded() {
3317 let data = b"hello\nworld".to_vec();
3318
3319 let buf1 = StringBuffer::new(0, data.clone());
3321 assert!(buf1.is_loaded());
3322 assert!(buf1.get_line_starts().is_some());
3323 assert_eq!(buf1.line_feed_count(), Some(1));
3324
3325 let buf2 = StringBuffer::new_loaded(0, data.clone(), false);
3327 assert!(buf2.is_loaded());
3328 assert_eq!(buf2.get_line_starts(), None);
3329 assert_eq!(buf2.line_feed_count(), None);
3330 }
3331
3332 #[test]
3335 fn test_load_small_file_eager_loading() {
3336 let temp_dir = TempDir::new().unwrap();
3337 let file_path = temp_dir.path().join("small.txt");
3338
3339 let test_data = b"hello\ntest";
3341 File::create(&file_path)
3342 .unwrap()
3343 .write_all(test_data)
3344 .unwrap();
3345
3346 let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
3348
3349 assert!(!buffer.large_file);
3351 assert_eq!(buffer.total_bytes(), test_data.len());
3352 assert_eq!(buffer.line_count(), Some(2)); assert_eq!(buffer.get_all_text().unwrap(), test_data);
3354
3355 assert!(buffer.buffers[0].is_loaded());
3357 }
3358
3359 #[test]
3360 fn test_load_large_file_lazy_loading() {
3361 let temp_dir = TempDir::new().unwrap();
3362 let file_path = temp_dir.path().join("large.txt");
3363
3364 let test_data = b"hello\nworld\ntest";
3366 File::create(&file_path)
3367 .unwrap()
3368 .write_all(test_data)
3369 .unwrap();
3370
3371 let buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
3373
3374 assert!(buffer.large_file);
3376 assert_eq!(buffer.total_bytes(), test_data.len());
3377
3378 assert_eq!(buffer.line_count(), None);
3380
3381 assert!(!buffer.buffers[0].is_loaded());
3383 assert_eq!(buffer.buffers[0].get_data(), None);
3384 }
3385
3386 #[test]
3394 fn test_issue_657_search_on_large_file_unloaded_buffer() {
3395 let temp_dir = TempDir::new().unwrap();
3396 let file_path = temp_dir.path().join("large_search_test.txt");
3397
3398 let test_data = b"line1\nline2\nSEARCH_TARGET\nline4\nline5";
3400 File::create(&file_path)
3401 .unwrap()
3402 .write_all(test_data)
3403 .unwrap();
3404
3405 let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
3407
3408 assert!(buffer.large_file, "Buffer should be in large file mode");
3410 assert!(
3411 !buffer.buffers[0].is_loaded(),
3412 "Buffer should be unloaded initially"
3413 );
3414
3415 assert!(
3418 buffer.to_string().is_none(),
3419 "BUG REPRODUCED: to_string() returns None for unloaded buffer"
3420 );
3421
3422 let total_bytes = buffer.len();
3424 let content = buffer.get_text_range_mut(0, total_bytes).unwrap();
3425 let content_str = String::from_utf8_lossy(&content);
3426
3427 assert!(
3429 content_str.contains("SEARCH_TARGET"),
3430 "FIX WORKS: get_text_range_mut() loaded the buffer and found the search target"
3431 );
3432
3433 assert!(
3435 buffer.to_string().is_some(),
3436 "After get_text_range_mut(), to_string() should work"
3437 );
3438 }
3439
3440 #[test]
3441 fn test_large_file_threshold_boundary() {
3442 let temp_dir = TempDir::new().unwrap();
3443
3444 let file_path = temp_dir.path().join("at_threshold.txt");
3446 let test_data = vec![b'x'; 100];
3447 File::create(&file_path)
3448 .unwrap()
3449 .write_all(&test_data)
3450 .unwrap();
3451
3452 let buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
3454 assert!(buffer.large_file);
3455
3456 let file_path2 = temp_dir.path().join("below_threshold.txt");
3458 let test_data2 = vec![b'x'; 99];
3459 File::create(&file_path2)
3460 .unwrap()
3461 .write_all(&test_data2)
3462 .unwrap();
3463
3464 let buffer2 = TextBuffer::load_from_file(&file_path2, 100, test_fs()).unwrap();
3466 assert!(!buffer2.large_file);
3467 }
3468
3469 #[test]
3470 fn test_large_file_default_threshold() {
3471 let temp_dir = TempDir::new().unwrap();
3472 let file_path = temp_dir.path().join("test.txt");
3473
3474 File::create(&file_path)
3476 .unwrap()
3477 .write_all(b"hello")
3478 .unwrap();
3479
3480 let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
3482
3483 assert!(!buffer.large_file);
3485 }
3486
3487 #[test]
3488 fn test_large_file_has_correct_piece_tree_structure() {
3489 let temp_dir = TempDir::new().unwrap();
3490 let file_path = temp_dir.path().join("large.txt");
3491
3492 let test_data = b"hello world";
3493 File::create(&file_path)
3494 .unwrap()
3495 .write_all(test_data)
3496 .unwrap();
3497
3498 let buffer = TextBuffer::load_from_file(&file_path, 5, test_fs()).unwrap();
3500
3501 assert_eq!(buffer.total_bytes(), test_data.len());
3503
3504 assert_eq!(buffer.buffers.len(), 1);
3506
3507 assert!(!buffer.buffers[0].is_loaded());
3509 }
3510
3511 #[test]
3512 fn test_empty_large_file() {
3513 let temp_dir = TempDir::new().unwrap();
3514 let file_path = temp_dir.path().join("empty.txt");
3515
3516 File::create(&file_path).unwrap();
3518
3519 let buffer = TextBuffer::load_from_file(&file_path, 0, test_fs()).unwrap();
3521
3522 assert_eq!(buffer.total_bytes(), 0);
3524 assert!(buffer.is_empty());
3525 }
3526
3527 #[test]
3528 fn test_large_file_basic_api_operations() {
3529 let temp_dir = TempDir::new().unwrap();
3530 let file_path = temp_dir.path().join("large_test.txt");
3531
3532 let test_data = b"line1\nline2\nline3\nline4\n";
3534 File::create(&file_path)
3535 .unwrap()
3536 .write_all(test_data)
3537 .unwrap();
3538
3539 let mut buffer = TextBuffer::load_from_file(&file_path, 10, test_fs()).unwrap();
3541
3542 assert!(buffer.large_file);
3544 assert_eq!(buffer.line_count(), None); assert_eq!(buffer.total_bytes(), test_data.len());
3548 assert!(!buffer.is_empty());
3549 assert_eq!(buffer.len(), test_data.len());
3550
3551 let range_result = buffer.get_text_range_mut(0, 5).unwrap();
3553 assert_eq!(range_result, b"line1");
3554
3555 let range_result2 = buffer.get_text_range_mut(6, 5).unwrap();
3556 assert_eq!(range_result2, b"line2");
3557
3558 let all_text = buffer.get_all_text().unwrap();
3560 assert_eq!(all_text, test_data);
3561
3562 assert_eq!(buffer.slice_bytes(0..5), b"line1");
3564
3565 buffer.insert_bytes(0, b"prefix_".to_vec());
3568 assert_eq!(buffer.total_bytes(), test_data.len() + 7);
3569 assert!(buffer.is_modified());
3570
3571 let text_after_insert = buffer.get_all_text().unwrap();
3573 assert_eq!(&text_after_insert[0..7], b"prefix_");
3574 assert_eq!(&text_after_insert[7..12], b"line1");
3575
3576 buffer.delete_bytes(0, 7);
3578 assert_eq!(buffer.total_bytes(), test_data.len());
3579
3580 let text_after_delete = buffer.get_all_text().unwrap();
3582 assert_eq!(text_after_delete, test_data);
3583
3584 let end_offset = buffer.total_bytes();
3586 buffer.insert_bytes(end_offset, b"suffix".to_vec());
3587 assert_eq!(buffer.total_bytes(), test_data.len() + 6);
3588
3589 let final_text = buffer.get_all_text().unwrap();
3591 assert!(final_text.ends_with(b"suffix"));
3592 assert_eq!(&final_text[0..test_data.len()], test_data);
3593
3594 let pos = buffer.offset_to_position(0).unwrap();
3598 assert_eq!(pos.column, 0);
3599
3600 let offset = buffer.position_to_offset(Position { line: 0, column: 0 });
3602 assert_eq!(offset, 0);
3603
3604 let replace_result = buffer.replace_range(0..5, "START");
3606 assert!(replace_result);
3607
3608 let text_after_replace = buffer.get_all_text().unwrap();
3609 assert!(text_after_replace.starts_with(b"START"));
3610 }
3611
3612 #[test]
3613 fn test_large_file_chunk_based_loading() {
3614 let temp_dir = TempDir::new().unwrap();
3615 let file_path = temp_dir.path().join("huge.txt");
3616
3617 let chunk_size = LOAD_CHUNK_SIZE; let file_size = chunk_size * 3; let mut file = File::create(&file_path).unwrap();
3624 file.write_all(&vec![b'A'; chunk_size]).unwrap();
3625 file.write_all(&vec![b'B'; chunk_size]).unwrap();
3626 file.write_all(&vec![b'C'; chunk_size]).unwrap();
3627 file.flush().unwrap();
3628
3629 let mut buffer = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
3631
3632 assert!(buffer.large_file);
3634 assert_eq!(buffer.total_bytes(), file_size);
3635
3636 assert!(!buffer.buffers[0].is_loaded());
3638
3639 let first_chunk_data = buffer.get_text_range_mut(0, 1024).unwrap();
3641 assert_eq!(first_chunk_data.len(), 1024);
3642 assert!(first_chunk_data.iter().all(|&b| b == b'A'));
3643
3644 let second_chunk_data = buffer.get_text_range_mut(chunk_size, 1024).unwrap();
3646 assert_eq!(second_chunk_data.len(), 1024);
3647 assert!(second_chunk_data.iter().all(|&b| b == b'B'));
3648
3649 let third_chunk_data = buffer.get_text_range_mut(chunk_size * 2, 1024).unwrap();
3651 assert_eq!(third_chunk_data.len(), 1024);
3652 assert!(third_chunk_data.iter().all(|&b| b == b'C'));
3653
3654 let cross_chunk_offset = chunk_size - 512;
3657 let cross_chunk_data = buffer.get_text_range_mut(cross_chunk_offset, 1024).unwrap();
3658 assert_eq!(cross_chunk_data.len(), 1024);
3659 assert!(cross_chunk_data[..512].iter().all(|&b| b == b'A'));
3661 assert!(cross_chunk_data[512..].iter().all(|&b| b == b'B'));
3662
3663 assert!(
3666 buffer.buffers.len() > 1,
3667 "Expected multiple buffers after chunk-based loading, got {}",
3668 buffer.buffers.len()
3669 );
3670
3671 buffer.insert_bytes(0, b"PREFIX".to_vec());
3673 assert_eq!(buffer.total_bytes(), file_size + 6);
3674
3675 let after_insert = buffer.get_text_range_mut(0, 6).unwrap();
3676 assert_eq!(after_insert, b"PREFIX");
3677
3678 let after_prefix = buffer.get_text_range_mut(6, 10).unwrap();
3680 assert!(after_prefix.iter().all(|&b| b == b'A'));
3681
3682 let mut buffer2 = TextBuffer::load_from_file(&file_path, 1, test_fs()).unwrap();
3685
3686 let chunk_read_size = 64 * 1024; let mut offset = 0;
3689 while offset < file_size {
3690 let bytes_to_read = chunk_read_size.min(file_size - offset);
3691 let chunk_data = buffer2.get_text_range_mut(offset, bytes_to_read).unwrap();
3692
3693 let first_mb_end = chunk_size;
3695 let second_mb_end = chunk_size * 2;
3696
3697 for (i, &byte) in chunk_data.iter().enumerate() {
3699 let file_offset = offset + i;
3700 let expected = if file_offset < first_mb_end {
3701 b'A'
3702 } else if file_offset < second_mb_end {
3703 b'B'
3704 } else {
3705 b'C'
3706 };
3707 assert_eq!(
3708 byte, expected,
3709 "Mismatch at file offset {}: expected {}, got {}",
3710 file_offset, expected as char, byte as char
3711 );
3712 }
3713
3714 offset += bytes_to_read;
3715 }
3716 }
3717
3718 #[test]
3722 fn test_large_file_incremental_save() {
3723 let temp_dir = TempDir::new().unwrap();
3724 let file_path = temp_dir.path().join("large_save_test.txt");
3725
3726 let chunk_size = 1000; let file_size = chunk_size * 2; let mut file = File::create(&file_path).unwrap();
3731 file.write_all(&vec![b'A'; chunk_size]).unwrap();
3733 file.write_all(&vec![b'B'; chunk_size]).unwrap();
3735 file.flush().unwrap();
3736
3737 let mut buffer = TextBuffer::load_from_file(&file_path, 100, test_fs()).unwrap();
3739 assert!(buffer.large_file);
3740 assert_eq!(buffer.total_bytes(), file_size);
3741
3742 let first_bytes = buffer.get_text_range_mut(0, 50).unwrap();
3744 assert!(first_bytes.iter().all(|&b| b == b'A'));
3745
3746 buffer.insert_bytes(0, b"PREFIX_".to_vec());
3748
3749 let save_path = temp_dir.path().join("saved.txt");
3751 buffer.save_to_file(&save_path).unwrap();
3752
3753 let saved_content = std::fs::read(&save_path).unwrap();
3755
3756 assert_eq!(
3758 saved_content.len(),
3759 file_size + 7,
3760 "Saved file should be {} bytes, got {}",
3761 file_size + 7,
3762 saved_content.len()
3763 );
3764
3765 assert_eq!(&saved_content[..7], b"PREFIX_", "Should start with PREFIX_");
3767
3768 assert!(
3770 saved_content[7..100].iter().all(|&b| b == b'A'),
3771 "First chunk after prefix should be A's"
3772 );
3773
3774 let second_chunk_start = 7 + chunk_size;
3776 assert!(
3777 saved_content[second_chunk_start..second_chunk_start + 100]
3778 .iter()
3779 .all(|&b| b == b'B'),
3780 "Second chunk should be B's (was unloaded, should be preserved)"
3781 );
3782 }
3783
3784 #[test]
3786 fn test_large_file_save_with_multiple_edits() {
3787 let temp_dir = TempDir::new().unwrap();
3788 let file_path = temp_dir.path().join("multi_edit.txt");
3789
3790 let mut content = Vec::new();
3792 for i in 0..100 {
3793 content.extend_from_slice(
3794 format!("Line {:04}: padding to make it longer\n", i).as_bytes(),
3795 );
3796 }
3797 let original_len = content.len();
3798 std::fs::write(&file_path, &content).unwrap();
3799
3800 let mut buffer = TextBuffer::load_from_file(&file_path, 500, test_fs()).unwrap();
3802 assert!(
3803 buffer.line_count().is_none(),
3804 "Should be in large file mode"
3805 );
3806
3807 buffer.insert_bytes(0, b"[START]".to_vec());
3809
3810 let mid_offset = original_len / 2;
3812 let _mid_bytes = buffer.get_text_range_mut(mid_offset + 7, 10).unwrap(); buffer.insert_bytes(mid_offset + 7, b"[MIDDLE]".to_vec());
3814
3815 let save_path = temp_dir.path().join("multi_edit_saved.txt");
3817 buffer.save_to_file(&save_path).unwrap();
3818
3819 let saved = std::fs::read_to_string(&save_path).unwrap();
3821
3822 assert!(
3823 saved.starts_with("[START]Line 0000"),
3824 "Should start with our edit"
3825 );
3826 assert!(saved.contains("[MIDDLE]"), "Should contain middle edit");
3827 assert!(saved.contains("Line 0099"), "Should preserve end of file");
3828
3829 let expected_len = original_len + 7 + 8; assert_eq!(
3832 saved.len(),
3833 expected_len,
3834 "Length should be original + edits"
3835 );
3836 }
3837 }
3838
3839 #[test]
3843 fn test_offset_to_position_simple() {
3844 let content = b"a\nb\nc\nd";
3850 let buffer = TextBuffer::from_bytes(content.to_vec(), test_fs());
3851
3852 let pos = buffer
3854 .offset_to_position(0)
3855 .expect("small buffer should have line metadata");
3856 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
3857 assert_eq!(pos.column, 0);
3858
3859 let pos = buffer
3860 .offset_to_position(1)
3861 .expect("small buffer should have line metadata");
3862 assert_eq!(pos.line, 0, "Byte 1 (newline) should be on line 0");
3863 assert_eq!(pos.column, 1);
3864
3865 let pos = buffer
3866 .offset_to_position(2)
3867 .expect("small buffer should have line metadata");
3868 assert_eq!(pos.line, 1, "Byte 2 should be on line 1");
3869 assert_eq!(pos.column, 0);
3870
3871 let pos = buffer
3872 .offset_to_position(3)
3873 .expect("small buffer should have line metadata");
3874 assert_eq!(pos.line, 1, "Byte 3 (newline) should be on line 1");
3875 assert_eq!(pos.column, 1);
3876
3877 let pos = buffer
3878 .offset_to_position(4)
3879 .expect("small buffer should have line metadata");
3880 assert_eq!(pos.line, 2, "Byte 4 should be on line 2");
3881 assert_eq!(pos.column, 0);
3882
3883 let pos = buffer
3884 .offset_to_position(6)
3885 .expect("small buffer should have line metadata");
3886 assert_eq!(pos.line, 3, "Byte 6 should be on line 3");
3887 assert_eq!(pos.column, 0);
3888 }
3889
3890 #[test]
3891 fn test_offset_to_position_after_insert() {
3892 let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
3894
3895 buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
3897
3898 let pos = buffer
3904 .offset_to_position(0)
3905 .expect("small buffer should have line metadata");
3906 assert_eq!(pos.line, 0, "Byte 0 should still be on line 0");
3907
3908 let pos = buffer
3909 .offset_to_position(2)
3910 .expect("small buffer should have line metadata");
3911 assert_eq!(
3912 pos.line, 1,
3913 "Byte 2 (start of inserted line) should be on line 1"
3914 );
3915
3916 let pos = buffer
3917 .offset_to_position(4)
3918 .expect("small buffer should have line metadata");
3919 assert_eq!(
3920 pos.line, 2,
3921 "Byte 4 (start of 'b') should be on line 2 after insert"
3922 );
3923 }
3924
3925 #[test]
3926 fn test_offset_to_position_empty_lines() {
3927 let buffer = TextBuffer::from_bytes(b"\n\n\n".to_vec(), test_fs());
3929
3930 let pos = buffer
3936 .offset_to_position(0)
3937 .expect("small buffer should have line metadata");
3938 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
3939
3940 let pos = buffer
3941 .offset_to_position(1)
3942 .expect("small buffer should have line metadata");
3943 assert_eq!(pos.line, 1, "Byte 1 should be on line 1");
3944
3945 let pos = buffer
3946 .offset_to_position(2)
3947 .expect("small buffer should have line metadata");
3948 assert_eq!(pos.line, 2, "Byte 2 should be on line 2");
3949
3950 let pos = buffer
3951 .offset_to_position(3)
3952 .expect("small buffer should have line metadata");
3953 assert_eq!(pos.line, 3, "Byte 3 (EOF) should be on line 3");
3954 }
3955
3956 #[test]
3957 fn test_offset_to_position_long_lines() {
3958 let mut content = Vec::new();
3960 content.extend_from_slice(b"aaaaaaaaaa\n"); content.extend_from_slice(b"bbbbbbbbbb\n"); content.extend_from_slice(b"cccccccccc"); let buffer = TextBuffer::from_bytes(content.clone(), test_fs());
3965
3966 let pos = buffer
3968 .offset_to_position(0)
3969 .expect("small buffer should have line metadata");
3970 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
3971 assert_eq!(pos.column, 0);
3972
3973 let pos = buffer
3974 .offset_to_position(11)
3975 .expect("small buffer should have line metadata");
3976 assert_eq!(pos.line, 1, "Byte 11 (start of line 1) should be on line 1");
3977 assert_eq!(pos.column, 0);
3978
3979 let pos = buffer
3980 .offset_to_position(22)
3981 .expect("small buffer should have line metadata");
3982 assert_eq!(pos.line, 2, "Byte 22 (start of line 2) should be on line 2");
3983 assert_eq!(pos.column, 0);
3984
3985 let pos = buffer
3987 .offset_to_position(5)
3988 .expect("small buffer should have line metadata");
3989 assert_eq!(pos.line, 0, "Byte 5 should be on line 0");
3990 assert_eq!(pos.column, 5);
3991
3992 let pos = buffer
3993 .offset_to_position(16)
3994 .expect("small buffer should have line metadata");
3995 assert_eq!(pos.line, 1, "Byte 16 should be on line 1");
3996 assert_eq!(pos.column, 5);
3997 }
3998
3999 #[test]
4000 fn test_line_iterator_with_offset_to_position() {
4001 let mut buffer = TextBuffer::from_bytes(b"line0\nline1\nline2\n".to_vec(), test_fs());
4003
4004 for byte_pos in 0..=buffer.len() {
4006 let iter = buffer.line_iterator(byte_pos, 80);
4007 let iter_pos = iter.current_position();
4008 let expected_line = buffer
4009 .offset_to_position(byte_pos)
4010 .expect("small buffer should have line metadata")
4011 .line;
4012 let expected_line_start = buffer.position_to_offset(Position {
4013 line: expected_line,
4014 column: 0,
4015 });
4016
4017 assert_eq!(
4018 iter_pos, expected_line_start,
4019 "LineIterator at byte {} should position at line start {} but got {}",
4020 byte_pos, expected_line_start, iter_pos
4021 );
4022 }
4023 }
4024
4025 #[test]
4026 fn test_piece_tree_line_count_after_insert() {
4027 let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec(), test_fs());
4029
4030 buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
4032
4033 let content = buffer.slice_bytes(0..buffer.len());
4035 let newline_count = content.iter().filter(|&&b| b == b'\n').count();
4036 let expected_line_count = newline_count + 1;
4037 let actual_line_count = buffer.line_count();
4038
4039 assert_eq!(
4040 actual_line_count,
4041 Some(expected_line_count),
4042 "Line count mismatch after insert"
4043 );
4044 }
4045
4046 #[test]
4047 fn test_position_to_lsp_position_after_modification() {
4048 let initial = b"fn foo(val: i32) {\n val + 1\n}\n";
4055 let mut buffer = TextBuffer::from_bytes(initial.to_vec(), test_fs());
4056
4057 let (line, char) = buffer.position_to_lsp_position(23);
4060 assert_eq!(line, 1, "Initial: position 23 should be on line 1");
4061 assert_eq!(char, 4, "Initial: position 23 should be at char 4");
4062
4063 buffer.delete_range(
4066 Position { line: 1, column: 4 },
4067 Position { line: 1, column: 7 },
4068 );
4069 buffer.insert_bytes(23, b"value".to_vec()); buffer.delete_range(
4074 Position { line: 0, column: 7 },
4075 Position {
4076 line: 0,
4077 column: 10,
4078 },
4079 );
4080 buffer.insert_bytes(7, b"value".to_vec()); let content = String::from_utf8_lossy(&buffer.get_all_text().unwrap()).to_string();
4084 assert_eq!(content, "fn foo(value: i32) {\n value + 1\n}\n");
4085
4086 let (line, char) = buffer.position_to_lsp_position(25);
4093 assert_eq!(
4094 line, 1,
4095 "After modification: position 25 should be on line 1"
4096 );
4097 assert_eq!(
4098 char, 4,
4099 "After modification: position 25 should be at char 4"
4100 );
4101
4102 let (line, char) = buffer.position_to_lsp_position(21);
4104 assert_eq!(line, 1, "Position 21 should be on line 1");
4105 assert_eq!(char, 0, "Position 21 should be at char 0 (start of line)");
4106 }
4107
4108 #[test]
4109 fn test_detect_crlf() {
4110 assert_eq!(
4111 TextBuffer::detect_line_ending(b"hello\r\nworld\r\n"),
4112 LineEnding::CRLF
4113 );
4114 }
4115
4116 #[test]
4117 fn test_detect_lf() {
4118 assert_eq!(
4119 TextBuffer::detect_line_ending(b"hello\nworld\n"),
4120 LineEnding::LF
4121 );
4122 }
4123
4124 #[test]
4125 fn test_normalize_crlf() {
4126 let input = b"hello\r\nworld\r\n".to_vec();
4127 let output = TextBuffer::normalize_line_endings(input);
4128 assert_eq!(output, b"hello\nworld\n");
4129 }
4130
4131 #[test]
4132 fn test_normalize_empty() {
4133 let input = Vec::new();
4134 let output = TextBuffer::normalize_line_endings(input);
4135 assert_eq!(output, Vec::<u8>::new());
4136 }
4137
4138 #[test]
4145 fn test_get_all_text_returns_empty_for_unloaded_buffers() {
4146 use tempfile::TempDir;
4147 let temp_dir = TempDir::new().unwrap();
4148 let file_path = temp_dir.path().join("large_test.txt");
4149
4150 let original_content = "X".repeat(50_000);
4152 std::fs::write(&file_path, &original_content).unwrap();
4153
4154 let mut buffer = TextBuffer::load_from_file(&file_path, 1024, test_fs()).unwrap();
4156 assert!(buffer.large_file, "Should be in large file mode");
4157 assert!(!buffer.buffers[0].is_loaded(), "Buffer should be unloaded");
4158
4159 buffer.insert_bytes(0, b"EDITED: ".to_vec());
4161
4162 let content_immutable = buffer.get_all_text();
4165
4166 assert!(
4169 content_immutable.is_none(),
4170 "get_all_text() should return None for large files with unloaded regions. \
4171 Got Some({} bytes) instead of None.",
4172 content_immutable.as_ref().map(|c| c.len()).unwrap_or(0)
4173 );
4174
4175 let total = buffer.total_bytes();
4177 let content_lazy = buffer.get_text_range_mut(0, total).unwrap();
4178 assert_eq!(
4179 content_lazy.len(),
4180 50_000 + 8,
4181 "get_text_range_mut() should return all content with lazy loading"
4182 );
4183 assert!(
4184 String::from_utf8_lossy(&content_lazy).starts_with("EDITED: "),
4185 "Content should start with our edit"
4186 );
4187 }
4188
4189 mod line_ending_conversion {
4192 use super::*;
4193
4194 #[test]
4195 fn test_convert_lf_to_crlf() {
4196 let input = b"Line 1\nLine 2\nLine 3\n";
4197 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4198 assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
4199 }
4200
4201 #[test]
4202 fn test_convert_crlf_to_lf() {
4203 let input = b"Line 1\r\nLine 2\r\nLine 3\r\n";
4204 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
4205 assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
4206 }
4207
4208 #[test]
4209 fn test_convert_cr_to_lf() {
4210 let input = b"Line 1\rLine 2\rLine 3\r";
4211 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
4212 assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
4213 }
4214
4215 #[test]
4216 fn test_convert_mixed_to_crlf() {
4217 let input = b"Line 1\nLine 2\r\nLine 3\r";
4219 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4220 assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
4221 }
4222
4223 #[test]
4224 fn test_convert_lf_to_lf_is_noop() {
4225 let input = b"Line 1\nLine 2\nLine 3\n";
4226 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
4227 assert_eq!(result, input.to_vec());
4228 }
4229
4230 #[test]
4231 fn test_convert_empty_content() {
4232 let input = b"";
4233 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4234 assert_eq!(result, b"".to_vec());
4235 }
4236
4237 #[test]
4238 fn test_convert_no_line_endings() {
4239 let input = b"No line endings here";
4240 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4241 assert_eq!(result, b"No line endings here".to_vec());
4242 }
4243
4244 #[test]
4245 fn test_set_line_ending_marks_modified() {
4246 let mut buffer = TextBuffer::from_bytes(b"Hello\nWorld\n".to_vec(), test_fs());
4247 assert!(!buffer.is_modified());
4248
4249 buffer.set_line_ending(LineEnding::CRLF);
4250 assert!(buffer.is_modified());
4251 }
4252
4253 #[test]
4254 fn test_set_default_line_ending_does_not_mark_modified() {
4255 let mut buffer = TextBuffer::empty(test_fs());
4256 assert!(!buffer.is_modified());
4257
4258 buffer.set_default_line_ending(LineEnding::CRLF);
4259 assert!(!buffer.is_modified());
4260 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
4261 }
4262
4263 #[test]
4264 fn test_save_to_file_converts_lf_to_crlf() {
4265 use tempfile::TempDir;
4266
4267 let temp_dir = TempDir::new().unwrap();
4268 let file_path = temp_dir.path().join("test_lf_to_crlf.txt");
4269
4270 let original_content = b"Line 1\nLine 2\nLine 3\n";
4272 std::fs::write(&file_path, original_content).unwrap();
4273
4274 let mut buffer =
4276 TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
4277 .unwrap();
4278 assert_eq!(buffer.line_ending(), LineEnding::LF);
4279
4280 buffer.set_line_ending(LineEnding::CRLF);
4282 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
4283 assert!(buffer.is_modified());
4284
4285 buffer.save_to_file(&file_path).unwrap();
4287
4288 let saved_bytes = std::fs::read(&file_path).unwrap();
4290 assert_eq!(&saved_bytes, b"Line 1\r\nLine 2\r\nLine 3\r\n");
4291 }
4292
4293 #[test]
4294 fn test_save_to_file_converts_crlf_to_lf() {
4295 use tempfile::TempDir;
4296
4297 let temp_dir = TempDir::new().unwrap();
4298 let file_path = temp_dir.path().join("test_crlf_to_lf.txt");
4299
4300 let original_content = b"Line 1\r\nLine 2\r\nLine 3\r\n";
4302 std::fs::write(&file_path, original_content).unwrap();
4303
4304 let mut buffer =
4306 TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD, test_fs())
4307 .unwrap();
4308 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
4309
4310 buffer.set_line_ending(LineEnding::LF);
4312 assert_eq!(buffer.line_ending(), LineEnding::LF);
4313 assert!(buffer.is_modified());
4314
4315 buffer.save_to_file(&file_path).unwrap();
4317
4318 let saved_bytes = std::fs::read(&file_path).unwrap();
4320 assert_eq!(&saved_bytes, b"Line 1\nLine 2\nLine 3\n");
4321 }
4322
4323 #[test]
4324 #[cfg(unix)]
4325 fn test_save_to_unwritable_file() -> anyhow::Result<()> {
4326 use std::fs::Permissions;
4327 use std::os::unix::fs::PermissionsExt;
4328 use tempfile::TempDir;
4329
4330 let temp_dir = TempDir::new().unwrap();
4331 let unwritable_dir = temp_dir.path().join("unwritable_dir");
4332 std::fs::create_dir(&unwritable_dir)?;
4333
4334 let file_path = unwritable_dir.join("unwritable.txt");
4335 std::fs::write(&file_path, "original content")?;
4336
4337 std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
4339
4340 let mut buffer = TextBuffer::from_bytes(b"new content".to_vec(), test_fs());
4341 let result = buffer.save_to_file(&file_path);
4342
4343 match result {
4345 Err(e) => {
4346 if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
4347 assert_eq!(sudo_err.dest_path, file_path);
4348 assert!(sudo_err.temp_path.exists());
4349 let _ = std::fs::remove_file(&sudo_err.temp_path);
4351 } else {
4352 panic!("Expected SudoSaveRequired error, got: {:?}", e);
4353 }
4354 }
4355 Ok(_) => panic!("Expected error, but save succeeded"),
4356 }
4357
4358 Ok(())
4359 }
4360
4361 #[test]
4362 #[cfg(unix)]
4363 fn test_save_to_unwritable_directory() -> anyhow::Result<()> {
4364 use std::fs::Permissions;
4365 use std::os::unix::fs::PermissionsExt;
4366 use tempfile::TempDir;
4367
4368 let temp_dir = TempDir::new().unwrap();
4369 let unwritable_dir = temp_dir.path().join("unwritable_dir");
4370 std::fs::create_dir(&unwritable_dir)?;
4371
4372 let file_path = unwritable_dir.join("test.txt");
4373
4374 std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
4376
4377 let mut buffer = TextBuffer::from_bytes(b"content".to_vec(), test_fs());
4378 let result = buffer.save_to_file(&file_path);
4379
4380 match result {
4381 Err(e) => {
4382 if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
4383 assert_eq!(sudo_err.dest_path, file_path);
4384 assert!(sudo_err.temp_path.exists());
4385 assert!(sudo_err.temp_path.starts_with(std::env::temp_dir()));
4387 let _ = std::fs::remove_file(&sudo_err.temp_path);
4389 } else {
4390 panic!("Expected SudoSaveRequired error, got: {:?}", e);
4391 }
4392 }
4393 Ok(_) => panic!("Expected error, but save succeeded"),
4394 }
4395
4396 Ok(())
4397 }
4398 }
4399}
4400
4401#[cfg(test)]
4402mod property_tests {
4403 use crate::model::filesystem::StdFileSystem;
4404 use std::sync::Arc;
4405
4406 fn test_fs() -> Arc<dyn crate::model::filesystem::FileSystem + Send + Sync> {
4407 Arc::new(StdFileSystem)
4408 }
4409 use super::*;
4410 use proptest::prelude::*;
4411
4412 fn text_with_newlines() -> impl Strategy<Value = Vec<u8>> {
4414 prop::collection::vec(
4415 prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n'),],
4416 0..100,
4417 )
4418 }
4419
4420 #[derive(Debug, Clone)]
4422 enum Operation {
4423 Insert { offset: usize, text: Vec<u8> },
4424 Delete { offset: usize, bytes: usize },
4425 }
4426
4427 fn operation_strategy() -> impl Strategy<Value = Vec<Operation>> {
4428 prop::collection::vec(
4429 prop_oneof![
4430 (0usize..200, text_with_newlines())
4431 .prop_map(|(offset, text)| { Operation::Insert { offset, text } }),
4432 (0usize..200, 1usize..50)
4433 .prop_map(|(offset, bytes)| { Operation::Delete { offset, bytes } }),
4434 ],
4435 0..50,
4436 )
4437 }
4438
4439 proptest! {
4440 #[test]
4441 fn prop_line_count_consistent(text in text_with_newlines()) {
4442 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
4443
4444 let newline_count = text.iter().filter(|&&b| b == b'\n').count();
4445 prop_assert_eq!(buffer.line_count(), Some(newline_count + 1));
4446 }
4447
4448 #[test]
4449 fn prop_get_all_text_matches_original(text in text_with_newlines()) {
4450 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
4451 prop_assert_eq!(buffer.get_all_text().unwrap(), text);
4452 }
4453
4454 #[test]
4455 fn prop_insert_increases_size(
4456 text in text_with_newlines(),
4457 offset in 0usize..100,
4458 insert_text in text_with_newlines()
4459 ) {
4460 let mut buffer = TextBuffer::from_bytes(text, test_fs());
4461 let initial_bytes = buffer.total_bytes();
4462
4463 let offset = offset.min(buffer.total_bytes());
4464 buffer.insert_bytes(offset, insert_text.clone());
4465
4466 prop_assert_eq!(buffer.total_bytes(), initial_bytes + insert_text.len());
4467 }
4468
4469 #[test]
4470 fn prop_delete_decreases_size(
4471 text in text_with_newlines(),
4472 offset in 0usize..100,
4473 delete_bytes in 1usize..50
4474 ) {
4475 if text.is_empty() {
4476 return Ok(());
4477 }
4478
4479 let mut buffer = TextBuffer::from_bytes(text, test_fs());
4480 let initial_bytes = buffer.total_bytes();
4481
4482 let offset = offset.min(buffer.total_bytes());
4483 let delete_bytes = delete_bytes.min(buffer.total_bytes() - offset);
4484
4485 if delete_bytes == 0 {
4486 return Ok(());
4487 }
4488
4489 buffer.delete_bytes(offset, delete_bytes);
4490
4491 prop_assert_eq!(buffer.total_bytes(), initial_bytes - delete_bytes);
4492 }
4493
4494 #[test]
4495 fn prop_insert_then_delete_restores_original(
4496 text in text_with_newlines(),
4497 offset in 0usize..100,
4498 insert_text in text_with_newlines()
4499 ) {
4500 let mut buffer = TextBuffer::from_bytes(text.clone(), test_fs());
4501
4502 let offset = offset.min(buffer.total_bytes());
4503 buffer.insert_bytes(offset, insert_text.clone());
4504 buffer.delete_bytes(offset, insert_text.len());
4505
4506 prop_assert_eq!(buffer.get_all_text().unwrap(), text);
4507 }
4508
4509 #[test]
4510 fn prop_offset_position_roundtrip(text in text_with_newlines()) {
4511 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
4512
4513 for offset in 0..text.len() {
4514 let pos = buffer.offset_to_position(offset).expect("offset_to_position should succeed for valid offset");
4515 let back = buffer.position_to_offset(pos);
4516 prop_assert_eq!(back, offset, "Failed roundtrip for offset {}", offset);
4517 }
4518 }
4519
4520 #[test]
4521 fn prop_get_text_range_valid(
4522 text in text_with_newlines(),
4523 offset in 0usize..100,
4524 length in 1usize..50
4525 ) {
4526 if text.is_empty() {
4527 return Ok(());
4528 }
4529
4530 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
4531 let offset = offset.min(buffer.total_bytes());
4532 let length = length.min(buffer.total_bytes() - offset);
4533
4534 if length == 0 {
4535 return Ok(());
4536 }
4537
4538 let result = buffer.get_text_range(offset, length);
4539 prop_assert_eq!(result, Some(text[offset..offset + length].to_vec()));
4540 }
4541
4542 #[test]
4543 fn prop_operations_maintain_consistency(operations in operation_strategy()) {
4544 let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec(), test_fs());
4545 let mut expected_text = b"initial\ntext".to_vec();
4546
4547 for op in operations {
4548 match op {
4549 Operation::Insert { offset, text } => {
4550 let offset = offset.min(buffer.total_bytes());
4551 buffer.insert_bytes(offset, text.clone());
4552
4553 let offset = offset.min(expected_text.len());
4555 expected_text.splice(offset..offset, text);
4556 }
4557 Operation::Delete { offset, bytes } => {
4558 if offset < buffer.total_bytes() {
4559 let bytes = bytes.min(buffer.total_bytes() - offset);
4560 buffer.delete_bytes(offset, bytes);
4561
4562 if offset < expected_text.len() {
4564 let bytes = bytes.min(expected_text.len() - offset);
4565 expected_text.drain(offset..offset + bytes);
4566 }
4567 }
4568 }
4569 }
4570 }
4571
4572 prop_assert_eq!(buffer.get_all_text().unwrap(), expected_text);
4573 }
4574
4575 #[test]
4576 fn prop_line_count_never_zero(operations in operation_strategy()) {
4577 let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
4578
4579 for op in operations {
4580 match op {
4581 Operation::Insert { offset, text } => {
4582 let offset = offset.min(buffer.total_bytes());
4583 buffer.insert_bytes(offset, text);
4584 }
4585 Operation::Delete { offset, bytes } => {
4586 buffer.delete_bytes(offset, bytes);
4587 }
4588 }
4589
4590 prop_assert!(buffer.line_count().unwrap_or(1) >= 1);
4592 }
4593 }
4594
4595 #[test]
4596 fn prop_total_bytes_never_negative(operations in operation_strategy()) {
4597 let mut buffer = TextBuffer::from_bytes(b"test".to_vec(), test_fs());
4598
4599 for op in operations {
4600 match op {
4601 Operation::Insert { offset, text } => {
4602 let offset = offset.min(buffer.total_bytes());
4603 buffer.insert_bytes(offset, text);
4604 }
4605 Operation::Delete { offset, bytes } => {
4606 buffer.delete_bytes(offset, bytes);
4607 }
4608 }
4609
4610 prop_assert!(buffer.total_bytes() < 10_000_000);
4612 }
4613 }
4614
4615 #[test]
4616 fn prop_piece_tree_and_line_index_stay_synced(operations in operation_strategy()) {
4617 let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec(), test_fs());
4618
4619 for op in operations {
4620 match op {
4621 Operation::Insert { offset, text } => {
4622 let offset = offset.min(buffer.total_bytes());
4623 buffer.insert_bytes(offset, text);
4624 }
4625 Operation::Delete { offset, bytes } => {
4626 buffer.delete_bytes(offset, bytes);
4627 }
4628 }
4629
4630 if buffer.total_bytes() > 0 {
4632 let mid_offset = buffer.total_bytes() / 2;
4633 if let Some(pos) = buffer.offset_to_position(mid_offset) {
4634 let back = buffer.position_to_offset(pos);
4635
4636 prop_assert!(back <= buffer.total_bytes());
4638 }
4639 }
4640 }
4641 }
4642
4643 #[test]
4644 fn prop_write_recipe_matches_content(text in text_with_newlines()) {
4645 let buffer = TextBuffer::from_bytes(text.clone(), test_fs());
4646 let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
4647
4648 let output = apply_recipe(&buffer, &recipe);
4650 prop_assert_eq!(output, text, "Recipe output should match original content");
4651 }
4652
4653 #[test]
4654 fn prop_write_recipe_after_edits(
4655 initial_text in text_with_newlines(),
4656 operations in operation_strategy()
4657 ) {
4658 let mut buffer = TextBuffer::from_bytes(initial_text, test_fs());
4659
4660 for op in operations {
4662 match op {
4663 Operation::Insert { offset, text } => {
4664 let offset = offset.min(buffer.total_bytes());
4665 buffer.insert_bytes(offset, text);
4666 }
4667 Operation::Delete { offset, bytes } => {
4668 if offset < buffer.total_bytes() {
4669 let bytes = bytes.min(buffer.total_bytes() - offset);
4670 if bytes > 0 {
4671 buffer.delete_bytes(offset, bytes);
4672 }
4673 }
4674 }
4675 }
4676 }
4677
4678 let expected = buffer.get_all_text().unwrap();
4680 let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
4681 let output = apply_recipe(&buffer, &recipe);
4682
4683 prop_assert_eq!(output, expected, "Recipe output should match buffer content after edits");
4684 }
4685
4686 #[test]
4687 fn prop_write_recipe_copy_ops_valid(
4688 text in prop::collection::vec(prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n')], 10..200),
4689 edit_offset in 0usize..100,
4690 edit_text in text_with_newlines()
4691 ) {
4692 use tempfile::TempDir;
4693
4694 let temp_dir = TempDir::new().unwrap();
4696 let file_path = temp_dir.path().join("test.txt");
4697 std::fs::write(&file_path, &text).unwrap();
4698
4699 let mut buffer = TextBuffer::load_from_file(&file_path, 1024 * 1024, test_fs()).unwrap();
4701
4702 let edit_offset = edit_offset.min(buffer.total_bytes());
4704 buffer.insert_bytes(edit_offset, edit_text.clone());
4705
4706 let recipe = buffer.build_write_recipe().expect("build_write_recipe should succeed");
4708
4709 let expected = buffer.get_all_text().unwrap();
4711 let output = apply_recipe(&buffer, &recipe);
4712 prop_assert_eq!(output, expected, "Recipe with Copy ops should match buffer content");
4713
4714 if text.len() > 100 && edit_offset > 10 {
4717 let has_copy = recipe.actions.iter().any(|a| matches!(a, RecipeAction::Copy { .. }));
4718 let _ = has_copy;
4721 }
4722 }
4723 }
4724
4725 fn apply_recipe(buffer: &TextBuffer, recipe: &WriteRecipe) -> Vec<u8> {
4727 let mut output = Vec::new();
4728 for action in &recipe.actions {
4729 match action {
4730 RecipeAction::Copy { offset, len } => {
4731 if let Some(src_path) = &recipe.src_path {
4732 let data = buffer
4733 .fs
4734 .read_range(src_path, *offset, *len as usize)
4735 .expect("read_range should succeed for Copy op");
4736 output.extend_from_slice(&data);
4737 } else {
4738 panic!("Copy action without source path");
4739 }
4740 }
4741 RecipeAction::Insert { index } => {
4742 output.extend_from_slice(&recipe.insert_data[*index]);
4743 }
4744 }
4745 }
4746 output
4747 }
4748
4749 #[test]
4750 fn test_detect_binary_text_files() {
4751 assert!(!TextBuffer::detect_binary(b"Hello, world!"));
4753 assert!(!TextBuffer::detect_binary(b"Line 1\nLine 2\nLine 3"));
4754 assert!(!TextBuffer::detect_binary(b"Tabs\tand\tnewlines\n"));
4755 assert!(!TextBuffer::detect_binary(b"Carriage return\r\n"));
4756
4757 assert!(!TextBuffer::detect_binary(b""));
4759
4760 assert!(!TextBuffer::detect_binary(b"\x1b[31mRed text\x1b[0m"));
4762 }
4763
4764 #[test]
4765 fn test_detect_binary_binary_files() {
4766 assert!(TextBuffer::detect_binary(b"Hello\x00World"));
4768 assert!(TextBuffer::detect_binary(b"\x00"));
4769
4770 assert!(TextBuffer::detect_binary(b"Text with \x01 control char"));
4772 assert!(TextBuffer::detect_binary(b"\x02\x03\x04"));
4773
4774 assert!(TextBuffer::detect_binary(b"Text with DEL\x7F"));
4776 }
4777
4778 #[test]
4779 fn test_detect_binary_png_file() {
4780 let png_header: &[u8] = &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
4783 assert!(TextBuffer::detect_binary(png_header));
4784
4785 let mut png_data = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
4787 png_data.extend_from_slice(b"\x00\x00\x00\x0DIHDR"); assert!(TextBuffer::detect_binary(&png_data));
4789 }
4790
4791 #[test]
4792 fn test_detect_binary_other_image_formats() {
4793 let jpeg_header: &[u8] = &[0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10];
4795 assert!(TextBuffer::detect_binary(jpeg_header));
4796
4797 let gif_data: &[u8] = &[
4800 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, ];
4806 assert!(TextBuffer::detect_binary(gif_data));
4808
4809 let bmp_header: &[u8] = &[0x42, 0x4D, 0x00, 0x00, 0x00, 0x00];
4811 assert!(TextBuffer::detect_binary(bmp_header));
4812 }
4813
4814 #[test]
4815 fn test_detect_binary_executable_formats() {
4816 let elf_header: &[u8] = &[0x7F, 0x45, 0x4C, 0x46, 0x02, 0x01, 0x01, 0x00];
4818 assert!(TextBuffer::detect_binary(elf_header));
4819
4820 let macho_header: &[u8] = &[0xCF, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x01];
4822 assert!(TextBuffer::detect_binary(macho_header));
4823
4824 let pe_header: &[u8] = &[0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00];
4826 assert!(TextBuffer::detect_binary(pe_header));
4827 }
4828}
4829
4830#[derive(Debug, Clone)]
4832pub struct LineData {
4833 pub byte_offset: usize,
4835 pub content: String,
4837 pub has_newline: bool,
4839 pub line_number: Option<usize>,
4841}
4842
4843pub struct TextBufferLineIterator {
4846 lines: Vec<LineData>,
4848 current_index: usize,
4850 pub has_more: bool,
4852}
4853
4854impl TextBufferLineIterator {
4855 pub(crate) fn new(buffer: &mut TextBuffer, byte_pos: usize, max_lines: usize) -> Result<Self> {
4856 let buffer_len = buffer.len();
4857 if byte_pos >= buffer_len {
4858 return Ok(Self {
4859 lines: Vec::new(),
4860 current_index: 0,
4861 has_more: false,
4862 });
4863 }
4864
4865 let has_line_metadata = buffer.line_count().is_some();
4867
4868 let mut current_line = if has_line_metadata {
4871 buffer.offset_to_position(byte_pos).map(|pos| pos.line)
4872 } else {
4873 None
4874 };
4875
4876 let mut lines = Vec::with_capacity(max_lines);
4877 let mut current_offset = byte_pos;
4878 let estimated_line_length = 80; for _ in 0..max_lines {
4882 if current_offset >= buffer_len {
4883 break;
4884 }
4885
4886 let line_start = current_offset;
4887 let line_number = current_line;
4888
4889 let estimated_max_line_length = estimated_line_length * 3;
4891 let bytes_to_scan = estimated_max_line_length.min(buffer_len - current_offset);
4892
4893 let chunk = buffer.get_text_range_mut(current_offset, bytes_to_scan)?;
4895
4896 let mut line_len = 0;
4898 let mut found_newline = false;
4899 for &byte in chunk.iter() {
4900 line_len += 1;
4901 if byte == b'\n' {
4902 found_newline = true;
4903 break;
4904 }
4905 }
4906
4907 if !found_newline && current_offset + line_len < buffer_len {
4909 let remaining = buffer_len - current_offset - line_len;
4911 let additional_bytes = estimated_max_line_length.min(remaining);
4912 let more_chunk =
4913 buffer.get_text_range_mut(current_offset + line_len, additional_bytes)?;
4914
4915 let mut extended_chunk = chunk;
4916 extended_chunk.extend_from_slice(&more_chunk);
4917
4918 for &byte in more_chunk.iter() {
4919 line_len += 1;
4920 if byte == b'\n' {
4921 found_newline = true;
4922 break;
4923 }
4924 }
4925
4926 let line_string = String::from_utf8_lossy(&extended_chunk[..line_len]).into_owned();
4927 let has_newline = line_string.ends_with('\n');
4928 let content = if has_newline {
4929 line_string[..line_string.len() - 1].to_string()
4930 } else {
4931 line_string
4932 };
4933
4934 lines.push(LineData {
4935 byte_offset: line_start,
4936 content,
4937 has_newline,
4938 line_number,
4939 });
4940
4941 current_offset += line_len;
4942 if has_line_metadata && found_newline {
4943 current_line = current_line.map(|n| n + 1);
4944 }
4945 continue;
4946 }
4947
4948 let line_string = String::from_utf8_lossy(&chunk[..line_len]).into_owned();
4950 let has_newline = line_string.ends_with('\n');
4951 let content = if has_newline {
4952 line_string[..line_string.len() - 1].to_string()
4953 } else {
4954 line_string
4955 };
4956
4957 lines.push(LineData {
4958 byte_offset: line_start,
4959 content,
4960 has_newline,
4961 line_number,
4962 });
4963
4964 current_offset += line_len;
4965 if has_line_metadata && found_newline {
4967 current_line = current_line.map(|n| n + 1);
4968 }
4969 }
4970
4971 let has_more = current_offset < buffer_len;
4973
4974 Ok(Self {
4975 lines,
4976 current_index: 0,
4977 has_more,
4978 })
4979 }
4980}
4981
4982impl Iterator for TextBufferLineIterator {
4983 type Item = LineData;
4984
4985 fn next(&mut self) -> Option<Self::Item> {
4986 if self.current_index < self.lines.len() {
4987 let line = self.lines[self.current_index].clone();
4988 self.current_index += 1;
4989 Some(line)
4990 } else {
4991 None
4992 }
4993 }
4994}