1use crate::model::piece_tree::{
4 BufferData, BufferLocation, Cursor, PieceInfo, PieceRangeIter, PieceTree, Position,
5 StringBuffer, TreeStats,
6};
7use crate::model::piece_tree_diff::PieceTreeDiff;
8use crate::primitives::grapheme;
9use anyhow::{Context, Result};
10use regex::bytes::Regex;
11use std::io::{self, Read, Seek, SeekFrom, Write};
12use std::ops::Range;
13use std::path::{Path, PathBuf};
14use std::sync::Arc;
15
16#[cfg(unix)]
17use std::os::unix::fs::MetadataExt;
18
19#[derive(Debug, Clone, PartialEq)]
24pub struct SudoSaveRequired {
25 pub temp_path: PathBuf,
27 pub dest_path: PathBuf,
29 pub uid: u32,
31 pub gid: u32,
33 pub mode: u32,
35}
36
37impl std::fmt::Display for SudoSaveRequired {
38 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39 write!(
40 f,
41 "Permission denied saving to {}. Use sudo to complete the operation.",
42 self.dest_path.display()
43 )
44 }
45}
46
47impl std::error::Error for SudoSaveRequired {}
48
49pub const DEFAULT_LARGE_FILE_THRESHOLD: usize = 100 * 1024 * 1024;
52
53pub const LOAD_CHUNK_SIZE: usize = 1024 * 1024;
55
56pub const CHUNK_ALIGNMENT: usize = 64 * 1024;
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
61pub enum LineEnding {
62 #[default]
64 LF,
65 CRLF,
67 CR,
69}
70
71impl LineEnding {
72 pub fn as_str(&self) -> &'static str {
74 match self {
75 Self::LF => "\n",
76 Self::CRLF => "\r\n",
77 Self::CR => "\r",
78 }
79 }
80
81 pub fn display_name(&self) -> &'static str {
83 match self {
84 Self::LF => "LF",
85 Self::CRLF => "CRLF",
86 Self::CR => "CR",
87 }
88 }
89}
90
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
94pub enum LineNumber {
95 Absolute(usize),
97 Relative {
99 line: usize,
100 from_cached_line: usize,
101 },
102}
103
104impl LineNumber {
105 pub fn value(&self) -> usize {
107 match self {
108 Self::Absolute(line) | Self::Relative { line, .. } => *line,
109 }
110 }
111
112 pub fn is_absolute(&self) -> bool {
114 matches!(self, LineNumber::Absolute(_))
115 }
116
117 pub fn is_relative(&self) -> bool {
119 matches!(self, LineNumber::Relative { .. })
120 }
121
122 pub fn format(&self) -> String {
124 match self {
125 Self::Absolute(line) => format!("{}", line + 1),
126 Self::Relative { line, .. } => format!("~{}", line + 1),
127 }
128 }
129}
130
131pub struct TextBuffer {
134 piece_tree: PieceTree,
136
137 saved_root: Arc<crate::model::piece_tree::PieceTreeNode>,
139
140 buffers: Vec<StringBuffer>,
144
145 next_buffer_id: usize,
147
148 file_path: Option<PathBuf>,
150
151 modified: bool,
153
154 recovery_pending: bool,
158
159 large_file: bool,
161
162 is_binary: bool,
165
166 line_ending: LineEnding,
168
169 original_line_ending: LineEnding,
173
174 saved_file_size: Option<usize>,
178
179 version: u64,
181}
182
183impl TextBuffer {
184 pub fn new(_large_file_threshold: usize) -> Self {
187 let piece_tree = PieceTree::empty();
188 let line_ending = LineEnding::default();
189 TextBuffer {
190 saved_root: piece_tree.root(),
191 piece_tree,
192 buffers: vec![StringBuffer::new(0, Vec::new())],
193 next_buffer_id: 1,
194 file_path: None,
195 modified: false,
196 recovery_pending: false,
197 large_file: false,
198 is_binary: false,
199 line_ending,
200 original_line_ending: line_ending,
201 saved_file_size: None,
202 version: 0,
203 }
204 }
205
206 pub fn version(&self) -> u64 {
208 self.version
209 }
210
211 #[inline]
212 fn bump_version(&mut self) {
213 self.version = self.version.wrapping_add(1);
214 }
215
216 #[inline]
217 fn mark_content_modified(&mut self) {
218 self.modified = true;
219 self.recovery_pending = true;
220 self.bump_version();
221 }
222
223 pub fn from_bytes(content: Vec<u8>) -> Self {
225 let bytes = content.len();
226
227 let line_ending = Self::detect_line_ending(&content);
229
230 let buffer = StringBuffer::new(0, content);
232 let line_feed_cnt = buffer.line_feed_count();
233
234 let piece_tree = if bytes > 0 {
235 PieceTree::new(BufferLocation::Stored(0), 0, bytes, line_feed_cnt)
236 } else {
237 PieceTree::empty()
238 };
239
240 let saved_root = piece_tree.root();
241
242 TextBuffer {
243 line_ending,
244 original_line_ending: line_ending,
245 piece_tree,
246 saved_root,
247 buffers: vec![buffer],
248 next_buffer_id: 1,
249 file_path: None,
250 modified: false,
251 recovery_pending: false,
252 large_file: false,
253 is_binary: false,
254 saved_file_size: Some(bytes), version: 0,
256 }
257 }
258
259 pub fn from_str(s: &str, _large_file_threshold: usize) -> Self {
261 Self::from_bytes(s.as_bytes().to_vec())
262 }
263
264 pub fn empty() -> Self {
266 let piece_tree = PieceTree::empty();
267 let saved_root = piece_tree.root();
268 let line_ending = LineEnding::default();
269 TextBuffer {
270 piece_tree,
271 saved_root,
272 buffers: vec![StringBuffer::new(0, Vec::new())],
273 next_buffer_id: 1,
274 file_path: None,
275 modified: false,
276 recovery_pending: false,
277 large_file: false,
278 is_binary: false,
279 line_ending,
280 original_line_ending: line_ending,
281 saved_file_size: None,
282 version: 0,
283 }
284 }
285
286 pub fn load_from_file<P: AsRef<Path>>(
288 path: P,
289 large_file_threshold: usize,
290 ) -> anyhow::Result<Self> {
291 let path = path.as_ref();
292
293 let metadata = std::fs::metadata(path)?;
295 let file_size = metadata.len() as usize;
296
297 let threshold = if large_file_threshold > 0 {
299 large_file_threshold
300 } else {
301 DEFAULT_LARGE_FILE_THRESHOLD
302 };
303
304 if file_size >= threshold {
306 Self::load_large_file(path, file_size)
307 } else {
308 Self::load_small_file(path)
309 }
310 }
311
312 fn load_small_file<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
314 let path = path.as_ref();
315 let mut file = std::fs::File::open(path)?;
316 let mut contents = Vec::new();
317 file.read_to_end(&mut contents)?;
318
319 let is_binary = Self::detect_binary(&contents);
321
322 let line_ending = Self::detect_line_ending(&contents);
324
325 let mut buffer = Self::from_bytes(contents);
327 buffer.file_path = Some(path.to_path_buf());
328 buffer.modified = false;
329 buffer.large_file = false;
330 buffer.is_binary = is_binary;
331 buffer.line_ending = line_ending;
332 buffer.original_line_ending = line_ending;
333 Ok(buffer)
334 }
335
336 fn load_large_file<P: AsRef<Path>>(path: P, file_size: usize) -> anyhow::Result<Self> {
338 use crate::model::piece_tree::{BufferData, BufferLocation};
339
340 let path = path.as_ref();
341
342 let (is_binary, line_ending) = {
345 let mut file = std::fs::File::open(path)?;
346 let sample_size = file_size.min(8 * 1024);
347 let mut sample = vec![0u8; sample_size];
348 file.read_exact(&mut sample)?;
349 let is_binary = Self::detect_binary(&sample);
350 let line_ending = Self::detect_line_ending(&sample);
351 (is_binary, line_ending)
352 };
353
354 let buffer = StringBuffer {
356 id: 0,
357 data: BufferData::Unloaded {
358 file_path: path.to_path_buf(),
359 file_offset: 0,
360 bytes: file_size,
361 },
362 };
363
364 let piece_tree = if file_size > 0 {
367 PieceTree::new(BufferLocation::Stored(0), 0, file_size, None)
368 } else {
369 PieceTree::empty()
370 };
371 let saved_root = piece_tree.root();
372
373 tracing::debug!(
374 "Buffer::load_from_file: loaded {} bytes, saved_file_size={}",
375 file_size,
376 file_size
377 );
378
379 Ok(TextBuffer {
380 piece_tree,
381 saved_root,
382 buffers: vec![buffer],
383 next_buffer_id: 1,
384 file_path: Some(path.to_path_buf()),
385 modified: false,
386 recovery_pending: false,
387 large_file: true,
388 is_binary,
389 line_ending,
390 original_line_ending: line_ending,
391 saved_file_size: Some(file_size),
392 version: 0,
393 })
394 }
395
396 pub fn save(&mut self) -> anyhow::Result<()> {
398 if let Some(path) = &self.file_path {
399 self.save_to_file(path.clone())
400 } else {
401 anyhow::bail!(io::Error::new(
402 io::ErrorKind::NotFound,
403 "No file path associated with buffer",
404 ))
405 }
406 }
407
408 #[cfg(unix)]
420 fn should_use_inplace_write(dest_path: &Path) -> bool {
421 if let Ok(meta) = std::fs::metadata(dest_path) {
422 let file_uid = meta.uid();
423 let current_uid = unsafe { libc::getuid() };
424 file_uid != current_uid
427 } else {
428 false
430 }
431 }
432
433 #[cfg(not(unix))]
434 fn should_use_inplace_write(_dest_path: &Path) -> bool {
435 false
437 }
438
439 fn create_temp_file(dest_path: &Path) -> io::Result<(PathBuf, std::fs::File)> {
440 let same_dir_temp = dest_path.with_extension("tmp");
442 match std::fs::File::create(&same_dir_temp) {
443 Ok(file) => Ok((same_dir_temp, file)),
444 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
445 let temp_dir = std::env::temp_dir();
447 let file_name = dest_path
448 .file_name()
449 .unwrap_or_else(|| std::ffi::OsStr::new("fresh-save"));
450 let timestamp = std::time::SystemTime::now()
451 .duration_since(std::time::UNIX_EPOCH)
452 .map(|d| d.as_nanos())
453 .unwrap_or(0);
454 let temp_path = temp_dir.join(format!(
455 "{}-{}-{}.tmp",
456 file_name.to_string_lossy(),
457 std::process::id(),
458 timestamp
459 ));
460 let file = std::fs::File::create(&temp_path)?;
461 Ok((temp_path, file))
462 }
463 Err(e) => Err(e),
464 }
465 }
466
467 pub fn save_to_file<P: AsRef<Path>>(&mut self, path: P) -> anyhow::Result<()> {
476 let dest_path = path.as_ref();
477 let total = self.total_bytes();
478
479 let original_metadata = std::fs::metadata(dest_path).ok();
482
483 let needs_conversion = self.line_ending != self.original_line_ending;
485 let target_ending = self.line_ending;
486
487 let use_inplace = Self::should_use_inplace_write(dest_path);
492
493 let (temp_path, mut out_file) = if use_inplace {
495 match std::fs::OpenOptions::new()
497 .write(true)
498 .truncate(true)
499 .open(dest_path)
500 {
501 Ok(file) => (None, file),
502 Err(e) if e.kind() == io::ErrorKind::PermissionDenied => {
503 let (path, file) = Self::create_temp_file(dest_path)?;
506 (Some(path), file)
507 }
508 Err(e) => return Err(e.into()),
509 }
510 } else {
511 let (path, file) = Self::create_temp_file(dest_path)?;
513 (Some(path), file)
514 };
515
516 if total > 0 {
517 let mut source_file_cache: Option<(PathBuf, std::fs::File)> = None;
519
520 for piece_view in self.piece_tree.iter_pieces_in_range(0, total) {
522 let buffer_id = piece_view.location.buffer_id();
523 let buffer = self.buffers.get(buffer_id).ok_or_else(|| {
524 io::Error::new(
525 io::ErrorKind::InvalidData,
526 format!("Buffer {} not found", buffer_id),
527 )
528 })?;
529
530 match &buffer.data {
531 BufferData::Loaded { data, .. } => {
532 let start = piece_view.buffer_offset;
533 let end = start + piece_view.bytes;
534 let chunk = &data[start..end];
535
536 if needs_conversion {
537 let converted = Self::convert_line_endings_to(chunk, target_ending);
539 out_file.write_all(&converted)?;
540 } else {
541 out_file.write_all(chunk)?;
543 }
544 }
545 BufferData::Unloaded {
546 file_path,
547 file_offset,
548 ..
549 } => {
550 let source_file = match &mut source_file_cache {
552 Some((cached_path, file)) if cached_path == file_path => file,
553 _ => {
554 let file = std::fs::File::open(file_path)?;
555 source_file_cache = Some((file_path.clone(), file));
556 &mut source_file_cache.as_mut().unwrap().1
557 }
558 };
559
560 let read_offset = *file_offset + piece_view.buffer_offset;
562 source_file.seek(SeekFrom::Start(read_offset as u64))?;
563
564 const STREAM_CHUNK_SIZE: usize = 64 * 1024; let mut remaining = piece_view.bytes;
567 let mut chunk_buf = vec![0u8; STREAM_CHUNK_SIZE.min(remaining)];
568
569 while remaining > 0 {
570 let to_read = remaining.min(chunk_buf.len());
571 source_file.read_exact(&mut chunk_buf[..to_read])?;
572
573 if needs_conversion {
574 let converted = Self::convert_line_endings_to(
576 &chunk_buf[..to_read],
577 target_ending,
578 );
579 out_file.write_all(&converted)?;
580 } else {
581 out_file.write_all(&chunk_buf[..to_read])?;
583 }
584 remaining -= to_read;
585 }
586 }
587 }
588 }
589 }
590
591 out_file.sync_all()?;
593 drop(out_file);
594
595 if let Some(temp_path) = temp_path {
597 if let Some(ref meta) = original_metadata {
599 let _ = Self::restore_file_metadata(&temp_path, meta);
601 }
602
603 if let Err(e) = std::fs::rename(&temp_path, dest_path) {
605 let is_permission_denied = e.kind() == io::ErrorKind::PermissionDenied;
606 let is_cross_device = cfg!(unix) && e.raw_os_error() == Some(18);
607
608 if is_cross_device {
609 #[cfg(unix)]
610 {
611 match std::fs::copy(&temp_path, dest_path) {
612 Ok(_) => {
613 let _ = std::fs::remove_file(&temp_path);
614 }
615 Err(copy_err) if copy_err.kind() == io::ErrorKind::PermissionDenied => {
616 return Err(self.make_sudo_error(
617 temp_path,
618 dest_path,
619 original_metadata,
620 ));
621 }
622 Err(copy_err) => return Err(copy_err.into()),
623 }
624 }
625 } else if is_permission_denied {
626 return Err(self.make_sudo_error(temp_path, dest_path, original_metadata));
627 } else {
628 return Err(e.into());
629 }
630 }
631 }
632 let new_size = std::fs::metadata(dest_path)?.len() as usize;
637 tracing::debug!(
638 "Buffer::save: updating saved_file_size from {:?} to {}",
639 self.saved_file_size,
640 new_size
641 );
642 self.saved_file_size = Some(new_size);
643
644 self.file_path = Some(dest_path.to_path_buf());
645 self.mark_saved_snapshot();
646
647 self.original_line_ending = self.line_ending;
650
651 Ok(())
652 }
653
654 pub fn finalize_external_save(&mut self, dest_path: PathBuf) -> anyhow::Result<()> {
658 let new_size = std::fs::metadata(&dest_path)?.len() as usize;
659 self.saved_file_size = Some(new_size);
660 self.file_path = Some(dest_path);
661 self.mark_saved_snapshot();
662 self.original_line_ending = self.line_ending;
663 Ok(())
664 }
665
666 fn make_sudo_error(
668 &self,
669 temp_path: PathBuf,
670 dest_path: &Path,
671 original_metadata: Option<std::fs::Metadata>,
672 ) -> anyhow::Error {
673 let (uid, gid, mode) = if let Some(meta) = original_metadata {
674 #[cfg(unix)]
675 {
676 (meta.uid(), meta.gid(), meta.mode() & 0o7777)
677 }
678 #[cfg(not(unix))]
679 (0, 0, 0)
680 } else {
681 (0, 0, 0)
682 };
683
684 anyhow::anyhow!(SudoSaveRequired {
685 temp_path,
686 dest_path: dest_path.to_path_buf(),
687 uid,
688 gid,
689 mode,
690 })
691 }
692
693 fn restore_file_metadata(path: &Path, original_meta: &std::fs::Metadata) -> anyhow::Result<()> {
695 std::fs::set_permissions(path, original_meta.permissions())?;
697
698 #[cfg(unix)]
700 {
701 let uid = original_meta.uid();
702 let gid = original_meta.gid();
703 unsafe {
706 use std::os::unix::ffi::OsStrExt;
707 let c_path = std::ffi::CString::new(path.as_os_str().as_bytes())
708 .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
709 libc::chown(c_path.as_ptr(), uid, gid);
710 }
711 }
712
713 Ok(())
714 }
715
716 pub fn total_bytes(&self) -> usize {
718 self.piece_tree.total_bytes()
719 }
720
721 pub fn line_count(&self) -> Option<usize> {
725 self.piece_tree.line_count()
726 }
727
728 pub fn mark_saved_snapshot(&mut self) {
730 self.saved_root = self.piece_tree.root();
731 self.modified = false;
732 }
733
734 pub fn diff_since_saved(&self) -> PieceTreeDiff {
746 if Arc::ptr_eq(&self.saved_root, &self.piece_tree.root()) {
749 return PieceTreeDiff {
750 equal: true,
751 byte_ranges: Vec::new(),
752 line_ranges: Some(Vec::new()),
753 };
754 }
755
756 let structure_diff = self.diff_trees_by_structure();
759
760 if structure_diff.equal {
762 return structure_diff;
763 }
764
765 let total_changed_bytes: usize = structure_diff
769 .byte_ranges
770 .iter()
771 .map(|r| r.end.saturating_sub(r.start))
772 .sum();
773
774 const MAX_VERIFY_BYTES: usize = 64 * 1024; if total_changed_bytes <= MAX_VERIFY_BYTES && !structure_diff.byte_ranges.is_empty() {
779 if self.verify_content_differs_in_ranges(&structure_diff.byte_ranges) {
781 return structure_diff;
783 } else {
784 return PieceTreeDiff {
786 equal: true,
787 byte_ranges: Vec::new(),
788 line_ranges: Some(Vec::new()),
789 };
790 }
791 }
792
793 structure_diff
795 }
796
797 fn verify_content_differs_in_ranges(&self, byte_ranges: &[std::ops::Range<usize>]) -> bool {
800 let saved_bytes = self.tree_total_bytes(&self.saved_root);
801 let current_bytes = self.piece_tree.total_bytes();
802
803 if saved_bytes != current_bytes {
805 return true;
806 }
807
808 for range in byte_ranges {
810 if range.start >= range.end {
811 continue;
812 }
813
814 let saved_slice =
816 self.extract_range_from_tree(&self.saved_root, range.start, range.end);
817 let current_slice = self.get_text_range(range.start, range.end);
819
820 match (saved_slice, current_slice) {
821 (Some(saved), Some(current)) => {
822 if saved != current {
823 return true; }
825 }
826 _ => {
827 return true;
829 }
830 }
831 }
832
833 false
835 }
836
837 fn extract_range_from_tree(
839 &self,
840 root: &Arc<crate::model::piece_tree::PieceTreeNode>,
841 start: usize,
842 end: usize,
843 ) -> Option<Vec<u8>> {
844 let mut result = Vec::with_capacity(end.saturating_sub(start));
845 self.collect_range_from_node(root, start, end, 0, &mut result)?;
846 Some(result)
847 }
848
849 fn collect_range_from_node(
851 &self,
852 node: &Arc<crate::model::piece_tree::PieceTreeNode>,
853 range_start: usize,
854 range_end: usize,
855 node_offset: usize,
856 result: &mut Vec<u8>,
857 ) -> Option<()> {
858 use crate::model::piece_tree::PieceTreeNode;
859
860 match node.as_ref() {
861 PieceTreeNode::Internal {
862 left_bytes,
863 left,
864 right,
865 ..
866 } => {
867 let left_end = node_offset + left_bytes;
868
869 if range_start < left_end {
871 self.collect_range_from_node(
872 left,
873 range_start,
874 range_end,
875 node_offset,
876 result,
877 )?;
878 }
879
880 if range_end > left_end {
882 self.collect_range_from_node(right, range_start, range_end, left_end, result)?;
883 }
884 }
885 PieceTreeNode::Leaf {
886 location,
887 offset,
888 bytes,
889 ..
890 } => {
891 let node_end = node_offset + bytes;
892
893 if range_start < node_end && range_end > node_offset {
895 let buf = self.buffers.get(location.buffer_id())?;
896 let data = buf.get_data()?;
897
898 let leaf_start = range_start.saturating_sub(node_offset);
900 let leaf_end = (range_end - node_offset).min(*bytes);
901
902 if leaf_start < leaf_end {
903 let slice = data.get(*offset + leaf_start..*offset + leaf_end)?;
904 result.extend_from_slice(slice);
905 }
906 }
907 }
908 }
909 Some(())
910 }
911
912 fn tree_total_bytes(&self, root: &Arc<crate::model::piece_tree::PieceTreeNode>) -> usize {
914 use crate::model::piece_tree::PieceTreeNode;
915 match root.as_ref() {
916 PieceTreeNode::Internal {
917 left_bytes, right, ..
918 } => left_bytes + self.tree_total_bytes(right),
919 PieceTreeNode::Leaf { bytes, .. } => *bytes,
920 }
921 }
922
923 fn diff_trees_by_structure(&self) -> PieceTreeDiff {
925 crate::model::piece_tree_diff::diff_piece_trees(
926 &self.saved_root,
927 &self.piece_tree.root(),
928 &|leaf, start, len| {
929 if len == 0 {
930 return Some(0);
931 }
932 let buf = self.buffers.get(leaf.location.buffer_id())?;
933 let data = buf.get_data()?;
934 let start = leaf.offset + start;
935 let end = start + len;
936 let slice = data.get(start..end)?;
937 let line_feeds = slice.iter().filter(|&&b| b == b'\n').count();
938 Some(line_feeds)
939 },
940 )
941 }
942
943 pub fn offset_to_position(&self, offset: usize) -> Option<Position> {
945 self.piece_tree
946 .offset_to_position(offset, &self.buffers)
947 .map(|(line, column)| Position { line, column })
948 }
949
950 pub fn position_to_offset(&self, position: Position) -> usize {
952 self.piece_tree
953 .position_to_offset(position.line, position.column, &self.buffers)
954 }
955
956 pub fn insert_bytes(&mut self, offset: usize, text: Vec<u8>) -> Cursor {
958 if text.is_empty() {
959 return self.piece_tree.cursor_at_offset(offset);
960 }
961
962 self.mark_content_modified();
964
965 let line_feed_cnt = Some(text.iter().filter(|&&b| b == b'\n').count());
967
968 let (buffer_location, buffer_offset, text_len) =
970 if let Some(append_info) = self.try_append_to_existing_buffer(offset, &text) {
971 append_info
972 } else {
973 let buffer_id = self.next_buffer_id;
975 self.next_buffer_id += 1;
976 let buffer = StringBuffer::new(buffer_id, text.clone());
977 self.buffers.push(buffer);
978 (BufferLocation::Added(buffer_id), 0, text.len())
979 };
980
981 self.piece_tree.insert(
983 offset,
984 buffer_location,
985 buffer_offset,
986 text_len,
987 line_feed_cnt,
988 &self.buffers,
989 )
990 }
991
992 fn try_append_to_existing_buffer(
995 &mut self,
996 offset: usize,
997 text: &[u8],
998 ) -> Option<(BufferLocation, usize, usize)> {
999 if text.is_empty() || offset == 0 {
1001 return None;
1002 }
1003
1004 let piece_info = self.piece_tree.find_by_offset(offset - 1)?;
1007
1008 let offset_in_piece = piece_info.offset_in_piece?;
1012 if offset_in_piece + 1 != piece_info.bytes {
1013 return None; }
1015
1016 if !matches!(piece_info.location, BufferLocation::Added(_)) {
1018 return None;
1019 }
1020
1021 let buffer_id = piece_info.location.buffer_id();
1022 let buffer = self.buffers.get_mut(buffer_id)?;
1023
1024 let buffer_len = buffer.get_data()?.len();
1026
1027 if piece_info.offset + piece_info.bytes != buffer_len {
1029 return None;
1030 }
1031
1032 let append_offset = buffer.append(text);
1034
1035 Some((piece_info.location, append_offset, text.len()))
1036 }
1037
1038 pub fn insert(&mut self, offset: usize, text: &str) {
1040 self.insert_bytes(offset, text.as_bytes().to_vec());
1041 }
1042
1043 pub fn insert_at_position(&mut self, position: Position, text: Vec<u8>) -> Cursor {
1046 if text.is_empty() {
1047 let offset = self.position_to_offset(position);
1048 return self.piece_tree.cursor_at_offset(offset);
1049 }
1050
1051 self.mark_content_modified();
1052
1053 let line_feed_cnt = text.iter().filter(|&&b| b == b'\n').count();
1055
1056 let buffer_id = self.next_buffer_id;
1058 self.next_buffer_id += 1;
1059 let buffer = StringBuffer::new(buffer_id, text.clone());
1060 self.buffers.push(buffer);
1061
1062 self.piece_tree.insert_at_position(
1064 position.line,
1065 position.column,
1066 BufferLocation::Added(buffer_id),
1067 0,
1068 text.len(),
1069 line_feed_cnt,
1070 &self.buffers,
1071 )
1072 }
1073
1074 pub fn delete_bytes(&mut self, offset: usize, bytes: usize) {
1076 if bytes == 0 || offset >= self.total_bytes() {
1077 return;
1078 }
1079
1080 self.piece_tree.delete(offset, bytes, &self.buffers);
1082
1083 self.mark_content_modified();
1084 }
1085
1086 pub fn delete(&mut self, range: Range<usize>) {
1088 if range.end > range.start {
1089 self.delete_bytes(range.start, range.end - range.start);
1090 }
1091 }
1092
1093 pub fn delete_range(&mut self, start: Position, end: Position) {
1096 self.piece_tree.delete_position_range(
1098 start.line,
1099 start.column,
1100 end.line,
1101 end.column,
1102 &self.buffers,
1103 );
1104 self.mark_content_modified();
1105 }
1106
1107 pub fn replace_content(&mut self, new_content: &str) {
1114 let bytes = new_content.len();
1115 let content_bytes = new_content.as_bytes().to_vec();
1116
1117 let line_feed_cnt = content_bytes.iter().filter(|&&b| b == b'\n').count();
1119
1120 let buffer_id = self.next_buffer_id;
1122 self.next_buffer_id += 1;
1123 let buffer = StringBuffer::new(buffer_id, content_bytes);
1124 self.buffers.push(buffer);
1125
1126 if bytes > 0 {
1128 self.piece_tree = PieceTree::new(
1129 BufferLocation::Added(buffer_id),
1130 0,
1131 bytes,
1132 Some(line_feed_cnt),
1133 );
1134 } else {
1135 self.piece_tree = PieceTree::empty();
1136 }
1137
1138 self.mark_content_modified();
1139 }
1140
1141 pub fn restore_piece_tree(&mut self, tree: &Arc<PieceTree>) {
1144 self.piece_tree = (**tree).clone();
1145 self.mark_content_modified();
1146 }
1147
1148 pub fn snapshot_piece_tree(&self) -> Arc<PieceTree> {
1151 Arc::new(self.piece_tree.clone())
1152 }
1153
1154 pub fn apply_bulk_edits(&mut self, edits: &[(usize, usize, &str)]) -> isize {
1157 let mut buffer_info: Vec<(BufferLocation, usize, usize, Option<usize>)> = Vec::new();
1162
1163 for (_, _, text) in edits {
1164 if !text.is_empty() {
1165 let buffer_id = self.next_buffer_id;
1166 self.next_buffer_id += 1;
1167 let content = text.as_bytes().to_vec();
1168 let lf_cnt = content.iter().filter(|&&b| b == b'\n').count();
1169 let bytes = content.len();
1170 let buffer = StringBuffer::new(buffer_id, content);
1171 self.buffers.push(buffer);
1172 buffer_info.push((BufferLocation::Added(buffer_id), 0, bytes, Some(lf_cnt)));
1173 }
1174 }
1176
1177 let mut idx = 0;
1179 let delta = self
1180 .piece_tree
1181 .apply_bulk_edits(edits, &self.buffers, |_text| {
1182 let info = buffer_info[idx];
1183 idx += 1;
1184 info
1185 });
1186
1187 self.mark_content_modified();
1188 delta
1189 }
1190
1191 fn get_text_range(&self, offset: usize, bytes: usize) -> Option<Vec<u8>> {
1197 if bytes == 0 {
1198 return Some(Vec::new());
1199 }
1200
1201 let mut result = Vec::with_capacity(bytes);
1202 let end_offset = offset + bytes;
1203 let mut collected = 0;
1204
1205 for piece_view in self.piece_tree.iter_pieces_in_range(offset, end_offset) {
1207 let buffer_id = piece_view.location.buffer_id();
1208 if let Some(buffer) = self.buffers.get(buffer_id) {
1209 let piece_start_in_doc = piece_view.doc_offset;
1211 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
1212
1213 let read_start = offset.max(piece_start_in_doc);
1215 let read_end = end_offset.min(piece_end_in_doc);
1216
1217 if read_end > read_start {
1218 let offset_in_piece = read_start - piece_start_in_doc;
1219 let bytes_to_read = read_end - read_start;
1220
1221 let buffer_start = piece_view.buffer_offset + offset_in_piece;
1222 let buffer_end = buffer_start + bytes_to_read;
1223
1224 let data = buffer.get_data()?;
1226
1227 if buffer_end <= data.len() {
1228 result.extend_from_slice(&data[buffer_start..buffer_end]);
1229 collected += bytes_to_read;
1230
1231 if collected >= bytes {
1232 break;
1233 }
1234 }
1235 }
1236 }
1237 }
1238
1239 Some(result)
1240 }
1241
1242 pub fn get_text_range_mut(&mut self, offset: usize, bytes: usize) -> Result<Vec<u8>> {
1250 if bytes == 0 {
1251 return Ok(Vec::new());
1252 }
1253
1254 let mut result = Vec::with_capacity(bytes);
1255 let end_offset = (offset + bytes).min(self.len());
1257 let mut current_offset = offset;
1258
1259 while current_offset < end_offset {
1261 let mut made_progress = false;
1262 let mut restarted_iteration = false;
1263
1264 for piece_view in self
1266 .piece_tree
1267 .iter_pieces_in_range(current_offset, end_offset)
1268 {
1269 let buffer_id = piece_view.location.buffer_id();
1270
1271 let needs_loading = self
1273 .buffers
1274 .get(buffer_id)
1275 .map(|b| !b.is_loaded())
1276 .unwrap_or(false);
1277
1278 if needs_loading {
1279 if piece_view.bytes > LOAD_CHUNK_SIZE {
1281 let piece_start_in_doc = piece_view.doc_offset;
1283 let offset_in_piece = current_offset.saturating_sub(piece_start_in_doc);
1284
1285 let chunk_start_in_buffer = (piece_view.buffer_offset + offset_in_piece)
1287 / CHUNK_ALIGNMENT
1288 * CHUNK_ALIGNMENT;
1289 let chunk_bytes = LOAD_CHUNK_SIZE.min(
1290 (piece_view.buffer_offset + piece_view.bytes)
1291 .saturating_sub(chunk_start_in_buffer),
1292 );
1293
1294 let chunk_start_offset_in_piece =
1296 chunk_start_in_buffer.saturating_sub(piece_view.buffer_offset);
1297 let split_start_in_doc = piece_start_in_doc + chunk_start_offset_in_piece;
1298 let split_end_in_doc = split_start_in_doc + chunk_bytes;
1299
1300 if chunk_start_offset_in_piece > 0 {
1302 self.piece_tree
1303 .split_at_offset(split_start_in_doc, &self.buffers);
1304 }
1305 if split_end_in_doc < piece_start_in_doc + piece_view.bytes {
1306 self.piece_tree
1307 .split_at_offset(split_end_in_doc, &self.buffers);
1308 }
1309
1310 let chunk_buffer = self
1312 .buffers
1313 .get(buffer_id)
1314 .context("Buffer not found")?
1315 .create_chunk_buffer(
1316 self.next_buffer_id,
1317 chunk_start_in_buffer,
1318 chunk_bytes,
1319 )
1320 .context("Failed to create chunk buffer")?;
1321
1322 self.next_buffer_id += 1;
1323 let new_buffer_id = chunk_buffer.id;
1324 self.buffers.push(chunk_buffer);
1325
1326 self.piece_tree.replace_buffer_reference(
1328 buffer_id,
1329 piece_view.buffer_offset + chunk_start_offset_in_piece,
1330 chunk_bytes,
1331 BufferLocation::Added(new_buffer_id),
1332 );
1333
1334 self.buffers
1336 .get_mut(new_buffer_id)
1337 .context("Chunk buffer not found")?
1338 .load()
1339 .context("Failed to load chunk")?;
1340
1341 restarted_iteration = true;
1343 break;
1344 } else {
1345 self.buffers
1347 .get_mut(buffer_id)
1348 .context("Buffer not found")?
1349 .load()
1350 .context("Failed to load buffer")?;
1351 }
1352 }
1353
1354 let piece_start_in_doc = piece_view.doc_offset;
1356 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
1357
1358 let read_start = current_offset.max(piece_start_in_doc);
1360 let read_end = end_offset.min(piece_end_in_doc);
1361
1362 if read_end > read_start {
1363 let offset_in_piece = read_start - piece_start_in_doc;
1364 let bytes_to_read = read_end - read_start;
1365
1366 let buffer_start = piece_view.buffer_offset + offset_in_piece;
1367 let buffer_end = buffer_start + bytes_to_read;
1368
1369 let buffer = self.buffers.get(buffer_id).context("Buffer not found")?;
1371 let data = buffer
1372 .get_data()
1373 .context("Buffer data unavailable after load")?;
1374
1375 anyhow::ensure!(
1376 buffer_end <= data.len(),
1377 "Buffer range out of bounds: requested {}..{}, buffer size {}",
1378 buffer_start,
1379 buffer_end,
1380 data.len()
1381 );
1382
1383 result.extend_from_slice(&data[buffer_start..buffer_end]);
1384 current_offset = read_end;
1385 made_progress = true;
1386 }
1387 }
1388
1389 if !made_progress && !restarted_iteration {
1391 tracing::error!(
1392 "get_text_range_mut: No progress at offset {} (requested range: {}..{}, buffer len: {})",
1393 current_offset,
1394 offset,
1395 end_offset,
1396 self.len()
1397 );
1398 tracing::error!(
1399 "Piece tree stats: {} total bytes",
1400 self.piece_tree.stats().total_bytes
1401 );
1402 anyhow::bail!(
1403 "Failed to read data at offset {}: no progress made (requested {}..{}, buffer len: {})",
1404 current_offset,
1405 offset,
1406 end_offset,
1407 self.len()
1408 );
1409 }
1410 }
1411
1412 Ok(result)
1413 }
1414
1415 pub fn prepare_viewport(&mut self, start_offset: usize, line_count: usize) -> Result<()> {
1428 let estimated_bytes = line_count.saturating_mul(200);
1431
1432 let remaining_bytes = self.total_bytes().saturating_sub(start_offset);
1434 let bytes_to_load = estimated_bytes.min(remaining_bytes);
1435
1436 self.get_text_range_mut(start_offset, bytes_to_load)?;
1439
1440 Ok(())
1441 }
1442
1443 pub(crate) fn get_all_text(&self) -> Option<Vec<u8>> {
1447 self.get_text_range(0, self.total_bytes())
1448 }
1449
1450 pub(crate) fn get_all_text_string(&self) -> Option<String> {
1454 self.get_all_text()
1455 .map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
1456 }
1457
1458 pub(crate) fn slice_bytes(&self, range: Range<usize>) -> Vec<u8> {
1463 self.get_text_range(range.start, range.end.saturating_sub(range.start))
1464 .unwrap_or_default()
1465 }
1466
1467 pub fn to_string(&self) -> Option<String> {
1470 self.get_all_text_string()
1471 }
1472
1473 pub fn len(&self) -> usize {
1475 self.total_bytes()
1476 }
1477
1478 pub fn is_empty(&self) -> bool {
1480 self.total_bytes() == 0
1481 }
1482
1483 pub fn file_path(&self) -> Option<&Path> {
1485 self.file_path.as_deref()
1486 }
1487
1488 pub fn set_file_path(&mut self, path: PathBuf) {
1490 self.file_path = Some(path);
1491 }
1492
1493 pub fn clear_file_path(&mut self) {
1497 self.file_path = None;
1498 }
1499
1500 pub fn extend_streaming(&mut self, source_path: &Path, new_size: usize) {
1504 let old_size = self.total_bytes();
1505 if new_size <= old_size {
1506 return;
1507 }
1508
1509 let additional_bytes = new_size - old_size;
1510
1511 let buffer_id = self.next_buffer_id;
1513 self.next_buffer_id += 1;
1514
1515 let new_buffer = StringBuffer::new_unloaded(
1516 buffer_id,
1517 source_path.to_path_buf(),
1518 old_size, additional_bytes, );
1521 self.buffers.push(new_buffer);
1522
1523 self.piece_tree.insert(
1525 old_size,
1526 BufferLocation::Stored(buffer_id),
1527 0,
1528 additional_bytes,
1529 None, &self.buffers,
1531 );
1532 }
1533
1534 pub fn is_modified(&self) -> bool {
1536 self.modified
1537 }
1538
1539 pub fn clear_modified(&mut self) {
1541 self.modified = false;
1542 }
1543
1544 pub fn set_modified(&mut self, modified: bool) {
1547 self.modified = modified;
1548 }
1549
1550 pub fn is_recovery_pending(&self) -> bool {
1552 self.recovery_pending
1553 }
1554
1555 pub fn set_recovery_pending(&mut self, pending: bool) {
1557 self.recovery_pending = pending;
1558 }
1559
1560 pub fn is_large_file(&self) -> bool {
1562 self.large_file
1563 }
1564
1565 pub fn original_file_size(&self) -> Option<usize> {
1569 self.saved_file_size
1572 }
1573
1574 pub fn get_recovery_chunks(&self) -> Vec<(usize, Vec<u8>)> {
1583 use crate::model::piece_tree::BufferLocation;
1584
1585 let mut chunks = Vec::new();
1586 let total = self.total_bytes();
1587
1588 let mut stored_bytes_before = 0;
1594
1595 for piece in self.piece_tree.iter_pieces_in_range(0, total) {
1596 match piece.location {
1597 BufferLocation::Stored(_) => {
1598 stored_bytes_before += piece.bytes;
1600 }
1601 BufferLocation::Added(buffer_id) => {
1602 if let Some(buffer) = self.buffers.iter().find(|b| b.id == buffer_id) {
1603 if let Some(data) = buffer.get_data() {
1605 let start = piece.buffer_offset;
1607 let end = start + piece.bytes;
1608 if end <= data.len() {
1609 chunks.push((stored_bytes_before, data[start..end].to_vec()));
1613 }
1614 }
1615 }
1616 }
1617 }
1618 }
1619
1620 chunks
1621 }
1622
1623 pub fn is_binary(&self) -> bool {
1625 self.is_binary
1626 }
1627
1628 pub fn line_ending(&self) -> LineEnding {
1630 self.line_ending
1631 }
1632
1633 pub fn set_line_ending(&mut self, line_ending: LineEnding) {
1638 self.line_ending = line_ending;
1639 self.mark_content_modified();
1640 }
1641
1642 pub fn set_default_line_ending(&mut self, line_ending: LineEnding) {
1647 self.line_ending = line_ending;
1648 self.original_line_ending = line_ending;
1649 }
1650
1651 pub fn detect_binary(bytes: &[u8]) -> bool {
1659 let check_len = bytes.len().min(8 * 1024);
1661 let sample = &bytes[..check_len];
1662
1663 let mut i = 0;
1664 while i < sample.len() {
1665 let byte = sample[i];
1666
1667 if byte == 0x1B && i + 1 < sample.len() {
1670 let next = sample[i + 1];
1671 if next == b'[' || next == b']' {
1672 i += 2;
1674 while i < sample.len() {
1675 let c = sample[i];
1676 if (0x40..=0x7E).contains(&c) {
1678 break;
1679 }
1680 i += 1;
1681 }
1682 i += 1;
1683 continue;
1684 }
1685 }
1686
1687 if byte == 0x00 {
1689 return true;
1690 }
1691
1692 if byte < 0x20
1697 && byte != 0x09
1698 && byte != 0x0A
1699 && byte != 0x0D
1700 && byte != 0x0C
1701 && byte != 0x0B
1702 && byte != 0x1B
1703 {
1704 return true;
1705 }
1706
1707 if byte == 0x7F {
1709 return true;
1710 }
1711
1712 i += 1;
1713 }
1714
1715 false
1716 }
1717
1718 pub fn detect_line_ending(bytes: &[u8]) -> LineEnding {
1723 let check_len = bytes.len().min(8 * 1024);
1725 let sample = &bytes[..check_len];
1726
1727 let mut crlf_count = 0;
1728 let mut lf_only_count = 0;
1729 let mut cr_only_count = 0;
1730
1731 let mut i = 0;
1732 while i < sample.len() {
1733 if sample[i] == b'\r' {
1734 if i + 1 < sample.len() && sample[i + 1] == b'\n' {
1736 crlf_count += 1;
1737 i += 2; continue;
1739 } else {
1740 cr_only_count += 1;
1742 }
1743 } else if sample[i] == b'\n' {
1744 lf_only_count += 1;
1746 }
1747 i += 1;
1748 }
1749
1750 if crlf_count > lf_only_count && crlf_count > cr_only_count {
1752 LineEnding::CRLF
1753 } else if cr_only_count > lf_only_count && cr_only_count > crlf_count {
1754 LineEnding::CR
1755 } else {
1756 LineEnding::LF
1758 }
1759 }
1760
1761 #[allow(dead_code)] pub fn normalize_line_endings(bytes: Vec<u8>) -> Vec<u8> {
1768 let mut normalized = Vec::with_capacity(bytes.len());
1769 let mut i = 0;
1770
1771 while i < bytes.len() {
1772 if bytes[i] == b'\r' {
1773 if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
1775 normalized.push(b'\n');
1777 i += 2; continue;
1779 } else {
1780 normalized.push(b'\n');
1782 }
1783 } else {
1784 normalized.push(bytes[i]);
1786 }
1787 i += 1;
1788 }
1789
1790 normalized
1791 }
1792
1793 fn convert_line_endings_to(bytes: &[u8], target_ending: LineEnding) -> Vec<u8> {
1798 let mut normalized = Vec::with_capacity(bytes.len());
1800 let mut i = 0;
1801 while i < bytes.len() {
1802 if bytes[i] == b'\r' {
1803 if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
1805 normalized.push(b'\n');
1807 i += 2;
1808 continue;
1809 } else {
1810 normalized.push(b'\n');
1812 }
1813 } else {
1814 normalized.push(bytes[i]);
1815 }
1816 i += 1;
1817 }
1818
1819 if target_ending == LineEnding::LF {
1821 return normalized;
1822 }
1823
1824 let replacement = target_ending.as_str().as_bytes();
1826 let mut result = Vec::with_capacity(normalized.len() + normalized.len() / 10);
1827
1828 for byte in normalized {
1829 if byte == b'\n' {
1830 result.extend_from_slice(replacement);
1831 } else {
1832 result.push(byte);
1833 }
1834 }
1835
1836 result
1837 }
1838
1839 pub fn get_line(&self, line: usize) -> Option<Vec<u8>> {
1841 let (start, end) = self.piece_tree.line_range(line, &self.buffers)?;
1842
1843 let bytes = if let Some(end_offset) = end {
1844 end_offset.saturating_sub(start)
1845 } else {
1846 self.total_bytes().saturating_sub(start)
1847 };
1848
1849 self.get_text_range(start, bytes)
1850 }
1851
1852 pub fn line_start_offset(&self, line: usize) -> Option<usize> {
1854 let (start, _) = self.piece_tree.line_range(line, &self.buffers)?;
1855 Some(start)
1856 }
1857
1858 pub fn piece_info_at_offset(&self, offset: usize) -> Option<PieceInfo> {
1860 self.piece_tree.find_by_offset(offset)
1861 }
1862
1863 pub fn stats(&self) -> TreeStats {
1865 self.piece_tree.stats()
1866 }
1867
1868 pub fn find_next(&self, pattern: &str, start_pos: usize) -> Option<usize> {
1872 if pattern.is_empty() {
1873 return None;
1874 }
1875
1876 let pattern_bytes = pattern.as_bytes();
1877 let buffer_len = self.len();
1878
1879 if start_pos < buffer_len {
1881 if let Some(offset) = self.find_pattern(start_pos, buffer_len, pattern_bytes) {
1882 return Some(offset);
1883 }
1884 }
1885
1886 if start_pos > 0 {
1888 if let Some(offset) = self.find_pattern(0, start_pos, pattern_bytes) {
1889 return Some(offset);
1890 }
1891 }
1892
1893 None
1894 }
1895
1896 pub fn find_next_in_range(
1900 &self,
1901 pattern: &str,
1902 start_pos: usize,
1903 range: Option<Range<usize>>,
1904 ) -> Option<usize> {
1905 if pattern.is_empty() {
1906 return None;
1907 }
1908
1909 if let Some(search_range) = range {
1910 let pattern_bytes = pattern.as_bytes();
1912 let search_start = start_pos.max(search_range.start);
1913 let search_end = search_range.end.min(self.len());
1914
1915 if search_start < search_end {
1916 self.find_pattern(search_start, search_end, pattern_bytes)
1917 } else {
1918 None
1919 }
1920 } else {
1921 self.find_next(pattern, start_pos)
1923 }
1924 }
1925
1926 fn find_pattern(&self, start: usize, end: usize, pattern: &[u8]) -> Option<usize> {
1928 if pattern.is_empty() || start >= end {
1929 return None;
1930 }
1931
1932 const CHUNK_SIZE: usize = 65536; let overlap = pattern.len().saturating_sub(1).max(1);
1934
1935 let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, overlap);
1937
1938 for chunk in chunks {
1939 if let Some(pos) = Self::find_in_bytes(&chunk.buffer, pattern) {
1941 let match_end = pos + pattern.len();
1942 if match_end > chunk.valid_start {
1945 let absolute_pos = chunk.absolute_pos + pos;
1946 if absolute_pos + pattern.len() <= end {
1948 return Some(absolute_pos);
1949 }
1950 }
1951 }
1952 }
1953
1954 None
1955 }
1956
1957 fn find_in_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
1959 if needle.is_empty() || needle.len() > haystack.len() {
1960 return None;
1961 }
1962
1963 (0..=haystack.len() - needle.len()).find(|&i| &haystack[i..i + needle.len()] == needle)
1964 }
1965
1966 pub fn find_next_regex(&self, regex: &Regex, start_pos: usize) -> Option<usize> {
1968 let buffer_len = self.len();
1969
1970 if start_pos < buffer_len {
1972 if let Some(offset) = self.find_regex(start_pos, buffer_len, regex) {
1973 return Some(offset);
1974 }
1975 }
1976
1977 if start_pos > 0 {
1979 if let Some(offset) = self.find_regex(0, start_pos, regex) {
1980 return Some(offset);
1981 }
1982 }
1983
1984 None
1985 }
1986
1987 pub fn find_next_regex_in_range(
1989 &self,
1990 regex: &Regex,
1991 start_pos: usize,
1992 range: Option<Range<usize>>,
1993 ) -> Option<usize> {
1994 if let Some(search_range) = range {
1995 let search_start = start_pos.max(search_range.start);
1996 let search_end = search_range.end.min(self.len());
1997
1998 if search_start < search_end {
1999 self.find_regex(search_start, search_end, regex)
2000 } else {
2001 None
2002 }
2003 } else {
2004 self.find_next_regex(regex, start_pos)
2005 }
2006 }
2007
2008 fn find_regex(&self, start: usize, end: usize, regex: &Regex) -> Option<usize> {
2010 if start >= end {
2011 return None;
2012 }
2013
2014 const CHUNK_SIZE: usize = 1048576; const OVERLAP: usize = 4096; let chunks = OverlappingChunks::new(self, start, end, CHUNK_SIZE, OVERLAP);
2020
2021 for chunk in chunks {
2022 if let Some(mat) = regex.find(&chunk.buffer) {
2024 let match_end = mat.end();
2025 if match_end > chunk.valid_start {
2028 let absolute_pos = chunk.absolute_pos + mat.start();
2029 let match_len = mat.end() - mat.start();
2031 if absolute_pos + match_len <= end {
2032 return Some(absolute_pos);
2033 }
2034 }
2035 }
2036 }
2037
2038 None
2039 }
2040
2041 pub fn replace_range(&mut self, range: Range<usize>, replacement: &str) -> bool {
2043 if range.start >= self.len() {
2044 return false;
2045 }
2046
2047 let end = range.end.min(self.len());
2048 if end > range.start {
2049 self.delete_bytes(range.start, end - range.start);
2050 }
2051
2052 if !replacement.is_empty() {
2053 self.insert(range.start, replacement);
2054 }
2055
2056 true
2057 }
2058
2059 pub fn replace_next(
2061 &mut self,
2062 pattern: &str,
2063 replacement: &str,
2064 start_pos: usize,
2065 range: Option<Range<usize>>,
2066 ) -> Option<usize> {
2067 if let Some(pos) = self.find_next_in_range(pattern, start_pos, range.clone()) {
2068 self.replace_range(pos..pos + pattern.len(), replacement);
2069 Some(pos)
2070 } else {
2071 None
2072 }
2073 }
2074
2075 pub fn replace_all(&mut self, pattern: &str, replacement: &str) -> usize {
2077 if pattern.is_empty() {
2078 return 0;
2079 }
2080
2081 let mut count = 0;
2082 let mut pos = 0;
2083
2084 while let Some(found_pos) = self.find_next_in_range(pattern, pos, Some(0..self.len())) {
2088 self.replace_range(found_pos..found_pos + pattern.len(), replacement);
2089 count += 1;
2090
2091 pos = found_pos + replacement.len();
2093
2094 if pos >= self.len() {
2096 break;
2097 }
2098 }
2099
2100 count
2101 }
2102
2103 pub fn replace_all_regex(&mut self, regex: &Regex, replacement: &str) -> Result<usize> {
2105 let mut count = 0;
2106 let mut pos = 0;
2107
2108 while let Some(found_pos) = self.find_next_regex_in_range(regex, pos, Some(0..self.len())) {
2109 let text = self
2111 .get_text_range_mut(found_pos, self.len() - found_pos)
2112 .context("Failed to read text for regex match")?;
2113
2114 if let Some(mat) = regex.find(&text) {
2115 self.replace_range(found_pos..found_pos + mat.len(), replacement);
2116 count += 1;
2117 pos = found_pos + replacement.len();
2118
2119 if pos >= self.len() {
2120 break;
2121 }
2122 } else {
2123 break;
2124 }
2125 }
2126
2127 Ok(count)
2128 }
2129
2130 pub fn position_to_line_col(&self, byte_pos: usize) -> (usize, usize) {
2134 self.offset_to_position(byte_pos)
2135 .map(|pos| (pos.line, pos.column))
2136 .unwrap_or_else(|| (byte_pos / 80, 0)) }
2138
2139 pub fn line_col_to_position(&self, line: usize, character: usize) -> usize {
2143 if let Some((start, end)) = self.piece_tree.line_range(line, &self.buffers) {
2144 let line_len = if let Some(end_offset) = end {
2146 end_offset.saturating_sub(start)
2147 } else {
2148 self.total_bytes().saturating_sub(start)
2149 };
2150 let byte_offset = character.min(line_len);
2151 start + byte_offset
2152 } else {
2153 self.len()
2155 }
2156 }
2157
2158 pub fn position_to_lsp_position(&self, byte_pos: usize) -> (usize, usize) {
2161 let (line, column_bytes) = self
2162 .offset_to_position(byte_pos)
2163 .map(|pos| (pos.line, pos.column))
2164 .unwrap_or_else(|| (byte_pos / 80, 0)); if let Some(line_bytes) = self.get_line(line) {
2168 let text_before = &line_bytes[..column_bytes.min(line_bytes.len())];
2170 let text_str = String::from_utf8_lossy(text_before);
2171 let utf16_offset = text_str.encode_utf16().count();
2172 (line, utf16_offset)
2173 } else {
2174 (line, 0)
2175 }
2176 }
2177
2178 pub fn lsp_position_to_byte(&self, line: usize, utf16_offset: usize) -> usize {
2182 if let Some((line_start, end)) = self.piece_tree.line_range(line, &self.buffers) {
2183 let line_len = if let Some(end_offset) = end {
2185 end_offset.saturating_sub(line_start)
2186 } else {
2187 self.total_bytes().saturating_sub(line_start)
2188 };
2189
2190 if line_len > 0 {
2191 let Some(line_bytes) = self.get_text_range(line_start, line_len) else {
2193 return line_start;
2194 };
2195 let line_str = String::from_utf8_lossy(&line_bytes);
2196
2197 let mut utf16_count = 0;
2199 let mut byte_offset = 0;
2200
2201 for ch in line_str.chars() {
2202 if utf16_count >= utf16_offset {
2203 break;
2204 }
2205 utf16_count += ch.len_utf16();
2206 byte_offset += ch.len_utf8();
2207 }
2208
2209 line_start + byte_offset
2210 } else {
2211 line_start
2212 }
2213 } else {
2214 self.len()
2216 }
2217 }
2218
2219 pub fn prev_char_boundary(&self, pos: usize) -> usize {
2223 if pos == 0 {
2224 return 0;
2225 }
2226
2227 let start = pos.saturating_sub(4);
2229 let Some(bytes) = self.get_text_range(start, pos - start) else {
2230 return pos;
2232 };
2233
2234 for i in (0..bytes.len()).rev() {
2236 let byte = bytes[i];
2237 if (byte & 0b1100_0000) != 0b1000_0000 {
2239 return start + i;
2240 }
2241 }
2242
2243 pos.saturating_sub(1)
2245 }
2246
2247 pub fn next_char_boundary(&self, pos: usize) -> usize {
2249 let len = self.len();
2250 if pos >= len {
2251 return len;
2252 }
2253
2254 let end = (pos + 5).min(len);
2256 let Some(bytes) = self.get_text_range(pos, end - pos) else {
2257 return pos;
2259 };
2260
2261 for (i, &byte) in bytes.iter().enumerate().skip(1) {
2263 if (byte & 0b1100_0000) != 0b1000_0000 {
2265 return pos + i;
2266 }
2267 }
2268
2269 end
2271 }
2272
2273 #[inline]
2277 fn is_utf8_continuation_byte(byte: u8) -> bool {
2278 (byte & 0b1100_0000) == 0b1000_0000
2279 }
2280
2281 pub fn snap_to_char_boundary(&self, pos: usize) -> usize {
2285 let len = self.len();
2286 if pos == 0 || pos >= len {
2287 return pos.min(len);
2288 }
2289
2290 let Some(bytes) = self.get_text_range(pos, 1) else {
2292 return pos;
2294 };
2295
2296 if !Self::is_utf8_continuation_byte(bytes[0]) {
2298 return pos;
2300 }
2301
2302 self.prev_char_boundary(pos)
2304 }
2305
2306 pub fn prev_grapheme_boundary(&self, pos: usize) -> usize {
2312 if pos == 0 {
2313 return 0;
2314 }
2315
2316 let raw_start = pos.saturating_sub(32);
2321 let start = if raw_start == 0 {
2322 0
2323 } else {
2324 self.prev_char_boundary(raw_start + 1)
2326 };
2327
2328 let Some(bytes) = self.get_text_range(start, pos - start) else {
2329 return self.prev_char_boundary(pos);
2331 };
2332
2333 let text = match std::str::from_utf8(&bytes) {
2334 Ok(s) => s,
2335 Err(e) => {
2336 let valid_bytes = &bytes[..e.valid_up_to()];
2339 match std::str::from_utf8(valid_bytes) {
2340 Ok(s) if !s.is_empty() => s,
2341 _ => return self.prev_char_boundary(pos),
2342 }
2343 }
2344 };
2345
2346 let rel_pos = pos - start;
2348 let new_rel_pos = grapheme::prev_grapheme_boundary(text, rel_pos);
2349
2350 if new_rel_pos == 0 && start > 0 {
2353 return self.prev_grapheme_boundary(start);
2354 }
2355
2356 start + new_rel_pos
2357 }
2358
2359 pub fn next_grapheme_boundary(&self, pos: usize) -> usize {
2365 let len = self.len();
2366 if pos >= len {
2367 return len;
2368 }
2369
2370 let end = (pos + 32).min(len);
2373 let Some(bytes) = self.get_text_range(pos, end - pos) else {
2374 return self.next_char_boundary(pos);
2376 };
2377
2378 let text = match std::str::from_utf8(&bytes) {
2381 Ok(s) => s,
2382 Err(e) => {
2383 let valid_bytes = &bytes[..e.valid_up_to()];
2386 match std::str::from_utf8(valid_bytes) {
2387 Ok(s) if !s.is_empty() => s,
2388 _ => return self.next_char_boundary(pos),
2389 }
2390 }
2391 };
2392
2393 let new_rel_pos = grapheme::next_grapheme_boundary(text, 0);
2395 pos + new_rel_pos
2396 }
2397
2398 pub fn prev_word_boundary(&self, pos: usize) -> usize {
2400 if pos == 0 {
2401 return 0;
2402 }
2403
2404 let start = pos.saturating_sub(256).max(0);
2406 let Some(bytes) = self.get_text_range(start, pos - start) else {
2407 return pos;
2409 };
2410 let text = String::from_utf8_lossy(&bytes);
2411
2412 let mut found_word_char = false;
2413 let chars: Vec<char> = text.chars().collect();
2414
2415 for i in (0..chars.len()).rev() {
2416 let ch = chars[i];
2417 let is_word_char = ch.is_alphanumeric() || ch == '_';
2418
2419 if found_word_char && !is_word_char {
2420 let byte_offset: usize = chars[0..=i].iter().map(|c| c.len_utf8()).sum();
2423 return start + byte_offset;
2424 }
2425
2426 if is_word_char {
2427 found_word_char = true;
2428 }
2429 }
2430
2431 0
2432 }
2433
2434 pub fn next_word_boundary(&self, pos: usize) -> usize {
2436 let len = self.len();
2437 if pos >= len {
2438 return len;
2439 }
2440
2441 let end = (pos + 256).min(len);
2443 let Some(bytes) = self.get_text_range(pos, end - pos) else {
2444 return pos;
2446 };
2447 let text = String::from_utf8_lossy(&bytes);
2448
2449 let mut found_word_char = false;
2450 let mut byte_offset = 0;
2451
2452 for ch in text.chars() {
2453 let is_word_char = ch.is_alphanumeric() || ch == '_';
2454
2455 if found_word_char && !is_word_char {
2456 return pos + byte_offset;
2458 }
2459
2460 if is_word_char {
2461 found_word_char = true;
2462 }
2463
2464 byte_offset += ch.len_utf8();
2465 }
2466
2467 len
2468 }
2469
2470 pub fn line_iterator(
2475 &mut self,
2476 byte_pos: usize,
2477 estimated_line_length: usize,
2478 ) -> LineIterator<'_> {
2479 LineIterator::new(self, byte_pos, estimated_line_length)
2480 }
2481
2482 pub fn iter_lines_from(
2496 &mut self,
2497 byte_pos: usize,
2498 max_lines: usize,
2499 ) -> Result<TextBufferLineIterator> {
2500 TextBufferLineIterator::new(self, byte_pos, max_lines)
2501 }
2502
2503 pub fn get_line_number(&self, byte_offset: usize) -> usize {
2516 self.offset_to_position(byte_offset)
2517 .map(|pos| pos.line)
2518 .unwrap_or_else(|| {
2519 byte_offset / 80
2521 })
2522 }
2523
2524 pub fn populate_line_cache(&mut self, start_byte: usize, _line_count: usize) -> usize {
2558 self.get_line_number(start_byte)
2561 }
2562
2563 pub fn get_cached_byte_offset_for_line(&self, line_number: usize) -> Option<usize> {
2565 self.line_start_offset(line_number)
2566 }
2567
2568 pub fn invalidate_line_cache_from(&mut self, _byte_offset: usize) {
2570 }
2572
2573 pub fn handle_line_cache_insertion(&mut self, _byte_offset: usize, _bytes_inserted: usize) {
2575 }
2577
2578 pub fn handle_line_cache_deletion(&mut self, _byte_offset: usize, _bytes_deleted: usize) {
2580 }
2582
2583 pub fn clear_line_cache(&mut self) {
2585 }
2587
2588 #[cfg(test)]
2592 pub fn from_str_test(s: &str) -> Self {
2593 Self::from_bytes(s.as_bytes().to_vec())
2594 }
2595
2596 #[cfg(test)]
2598 pub fn new_test() -> Self {
2599 Self::empty()
2600 }
2601}
2602
2603pub type Buffer = TextBuffer;
2605
2606pub use crate::primitives::line_iterator::LineIterator;
2608
2609#[derive(Debug)]
2615pub struct ChunkInfo {
2616 pub buffer: Vec<u8>,
2618
2619 pub absolute_pos: usize,
2621
2622 pub valid_start: usize,
2625}
2626
2627pub struct OverlappingChunks<'a> {
2655 piece_iter: PieceRangeIter,
2656 buffers: &'a [StringBuffer],
2657
2658 buffer: Vec<u8>,
2660 buffer_absolute_pos: usize,
2661
2662 current_pos: usize,
2664 end_pos: usize,
2665
2666 chunk_size: usize,
2668 overlap: usize,
2669
2670 first_chunk: bool,
2672
2673 current_piece_data: Option<Vec<u8>>,
2675 current_piece_offset: usize,
2676}
2677
2678impl<'a> OverlappingChunks<'a> {
2679 pub fn new(
2694 text_buffer: &'a TextBuffer,
2695 start: usize,
2696 end: usize,
2697 chunk_size: usize,
2698 overlap: usize,
2699 ) -> Self {
2700 let piece_iter = text_buffer.piece_tree.iter_pieces_in_range(start, end);
2701
2702 Self {
2703 piece_iter,
2704 buffers: &text_buffer.buffers,
2705 buffer: Vec::with_capacity(chunk_size + overlap),
2706 buffer_absolute_pos: start,
2707 current_pos: start,
2708 end_pos: end,
2709 chunk_size,
2710 overlap,
2711 first_chunk: true,
2712 current_piece_data: None,
2713 current_piece_offset: 0,
2714 }
2715 }
2716
2717 fn read_byte(&mut self) -> Option<u8> {
2719 loop {
2720 if let Some(ref data) = self.current_piece_data {
2722 if self.current_piece_offset < data.len() {
2723 let byte = data[self.current_piece_offset];
2724 self.current_piece_offset += 1;
2725 self.current_pos += 1;
2726 return Some(byte);
2727 } else {
2728 self.current_piece_data = None;
2730 self.current_piece_offset = 0;
2731 }
2732 }
2733
2734 if let Some(piece_view) = self.piece_iter.next() {
2736 let buffer_id = piece_view.location.buffer_id();
2737 if let Some(buffer) = self.buffers.get(buffer_id) {
2738 let piece_start_in_doc = piece_view.doc_offset;
2740 let piece_end_in_doc = piece_view.doc_offset + piece_view.bytes;
2741
2742 let read_start = self.current_pos.max(piece_start_in_doc);
2744 let read_end = self.end_pos.min(piece_end_in_doc);
2745
2746 if read_end > read_start {
2747 let offset_in_piece = read_start - piece_start_in_doc;
2748 let bytes_to_read = read_end - read_start;
2749
2750 let buffer_start = piece_view.buffer_offset + offset_in_piece;
2751 let buffer_end = buffer_start + bytes_to_read;
2752
2753 if let Some(data) = buffer.get_data() {
2754 if buffer_end <= data.len() {
2755 self.current_piece_data =
2757 Some(data[buffer_start..buffer_end].to_vec());
2758 self.current_piece_offset = 0;
2759 continue;
2760 }
2761 }
2762 }
2763 }
2764 }
2765
2766 return None;
2768 }
2769 }
2770
2771 fn fill_next_chunk(&mut self) -> bool {
2773 if self.first_chunk {
2774 self.first_chunk = false;
2776 while self.buffer.len() < self.chunk_size && self.current_pos < self.end_pos {
2777 if let Some(byte) = self.read_byte() {
2778 self.buffer.push(byte);
2779 } else {
2780 break;
2781 }
2782 }
2783 !self.buffer.is_empty()
2784 } else {
2785 if self.current_pos >= self.end_pos {
2787 return false;
2788 }
2789
2790 if self.buffer.len() > self.overlap {
2792 let drain_amount = self.buffer.len() - self.overlap;
2793 self.buffer.drain(0..drain_amount);
2794 self.buffer_absolute_pos += drain_amount;
2795 }
2796
2797 let before_len = self.buffer.len();
2799 let target_len = self.overlap + self.chunk_size;
2800 while self.buffer.len() < target_len && self.current_pos < self.end_pos {
2801 if let Some(byte) = self.read_byte() {
2802 self.buffer.push(byte);
2803 } else {
2804 break;
2805 }
2806 }
2807
2808 self.buffer.len() > before_len
2810 }
2811 }
2812}
2813
2814impl<'a> Iterator for OverlappingChunks<'a> {
2815 type Item = ChunkInfo;
2816
2817 fn next(&mut self) -> Option<Self::Item> {
2818 let is_first = self.buffer_absolute_pos == self.current_pos;
2820
2821 if !self.fill_next_chunk() {
2822 return None;
2823 }
2824
2825 let valid_start = if is_first {
2828 0
2829 } else {
2830 self.overlap.min(self.buffer.len())
2831 };
2832
2833 Some(ChunkInfo {
2834 buffer: self.buffer.clone(),
2835 absolute_pos: self.buffer_absolute_pos,
2836 valid_start,
2837 })
2838 }
2839}
2840
2841#[cfg(test)]
2842mod tests {
2843 use super::*;
2844
2845 #[test]
2846 fn test_empty_buffer() {
2847 let buffer = TextBuffer::empty();
2848 assert_eq!(buffer.total_bytes(), 0);
2849 assert_eq!(buffer.line_count(), Some(1)); }
2851
2852 #[test]
2853 fn test_line_positions_multiline() {
2854 let buffer = TextBuffer::from_bytes(b"Hello\nNew Line\nWorld!".to_vec());
2855
2856 assert_eq!(buffer.line_count(), Some(3));
2858
2859 assert_eq!(buffer.line_start_offset(0), Some(0)); assert_eq!(buffer.line_start_offset(1), Some(6)); assert_eq!(buffer.line_start_offset(2), Some(15)); assert_eq!(buffer.offset_to_position(0).unwrap().line, 0); assert_eq!(buffer.offset_to_position(5).unwrap().line, 0); assert_eq!(buffer.offset_to_position(6).unwrap().line, 1); assert_eq!(buffer.offset_to_position(14).unwrap().line, 1); assert_eq!(buffer.offset_to_position(15).unwrap().line, 2); assert_eq!(buffer.line_col_to_position(0, 5), 5); assert_eq!(buffer.line_col_to_position(1, 0), 6); assert_eq!(buffer.line_col_to_position(1, 8), 14); assert_eq!(buffer.line_col_to_position(2, 0), 15); }
2877
2878 #[test]
2879 fn test_new_from_content() {
2880 let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec());
2881 assert_eq!(buffer.total_bytes(), 11);
2882 assert_eq!(buffer.line_count(), Some(2));
2883 }
2884
2885 #[test]
2886 fn test_get_all_text() {
2887 let buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec());
2888 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld");
2889 }
2890
2891 #[test]
2892 fn test_insert_at_start() {
2893 let mut buffer = TextBuffer::from_bytes(b"world".to_vec());
2894 buffer.insert_bytes(0, b"hello ".to_vec());
2895
2896 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
2897 assert_eq!(buffer.total_bytes(), 11);
2898 }
2899
2900 #[test]
2901 fn test_insert_in_middle() {
2902 let mut buffer = TextBuffer::from_bytes(b"helloworld".to_vec());
2903 buffer.insert_bytes(5, b" ".to_vec());
2904
2905 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
2906 assert_eq!(buffer.total_bytes(), 11);
2907 }
2908
2909 #[test]
2910 fn test_insert_at_end() {
2911 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec());
2912 buffer.insert_bytes(5, b" world".to_vec());
2913
2914 assert_eq!(buffer.get_all_text().unwrap(), b"hello world");
2915 assert_eq!(buffer.total_bytes(), 11);
2916 }
2917
2918 #[test]
2919 fn test_insert_with_newlines() {
2920 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec());
2921 buffer.insert_bytes(5, b"\nworld\ntest".to_vec());
2922
2923 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nworld\ntest");
2924 assert_eq!(buffer.line_count(), Some(3));
2925 }
2926
2927 #[test]
2928 fn test_delete_from_start() {
2929 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec());
2930 buffer.delete_bytes(0, 6);
2931
2932 assert_eq!(buffer.get_all_text().unwrap(), b"world");
2933 assert_eq!(buffer.total_bytes(), 5);
2934 }
2935
2936 #[test]
2937 fn test_delete_from_middle() {
2938 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec());
2939 buffer.delete_bytes(5, 1);
2940
2941 assert_eq!(buffer.get_all_text().unwrap(), b"helloworld");
2942 assert_eq!(buffer.total_bytes(), 10);
2943 }
2944
2945 #[test]
2946 fn test_delete_from_end() {
2947 let mut buffer = TextBuffer::from_bytes(b"hello world".to_vec());
2948 buffer.delete_bytes(6, 5);
2949
2950 assert_eq!(buffer.get_all_text().unwrap(), b"hello ");
2951 assert_eq!(buffer.total_bytes(), 6);
2952 }
2953
2954 #[test]
2955 fn test_delete_with_newlines() {
2956 let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec());
2957 buffer.delete_bytes(5, 7); assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
2960 assert_eq!(buffer.line_count(), Some(1));
2961 }
2962
2963 #[test]
2964 fn test_offset_position_conversions() {
2965 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec());
2966
2967 let pos = buffer.offset_to_position(0);
2968 assert_eq!(pos, Some(Position { line: 0, column: 0 }));
2969
2970 let pos = buffer.offset_to_position(6);
2971 assert_eq!(pos, Some(Position { line: 1, column: 0 }));
2972
2973 let offset = buffer.position_to_offset(Position { line: 1, column: 0 });
2974 assert_eq!(offset, 6);
2975 }
2976
2977 #[test]
2978 fn test_insert_at_position() {
2979 let mut buffer = TextBuffer::from_bytes(b"hello\nworld".to_vec());
2980 buffer.insert_at_position(Position { line: 1, column: 0 }, b"beautiful ".to_vec());
2981
2982 assert_eq!(buffer.get_all_text().unwrap(), b"hello\nbeautiful world");
2983 }
2984
2985 #[test]
2986 fn test_delete_range() {
2987 let mut buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec());
2988
2989 let start = Position { line: 0, column: 5 };
2990 let end = Position { line: 2, column: 0 };
2991 buffer.delete_range(start, end);
2992
2993 assert_eq!(buffer.get_all_text().unwrap(), b"hellotest");
2994 }
2995
2996 #[test]
2997 fn test_get_line() {
2998 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec());
2999
3000 assert_eq!(buffer.get_line(0), Some(b"hello\n".to_vec()));
3001 assert_eq!(buffer.get_line(1), Some(b"world\n".to_vec()));
3002 assert_eq!(buffer.get_line(2), Some(b"test".to_vec()));
3003 assert_eq!(buffer.get_line(3), None);
3004 }
3005
3006 #[test]
3007 fn test_multiple_operations() {
3008 let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec());
3009
3010 buffer.insert_bytes(0, b"start\n".to_vec());
3011 assert_eq!(buffer.line_count(), Some(4));
3012
3013 buffer.delete_bytes(6, 6); assert_eq!(buffer.line_count(), Some(3));
3015
3016 buffer.insert_bytes(6, b"new\n".to_vec());
3017 assert_eq!(buffer.line_count(), Some(4));
3018
3019 let text = buffer.get_all_text().unwrap();
3020 assert_eq!(text, b"start\nnew\nline2\nline3");
3021 }
3022
3023 #[test]
3024 fn test_get_text_range() {
3025 let buffer = TextBuffer::from_bytes(b"hello world".to_vec());
3026
3027 assert_eq!(buffer.get_text_range(0, 5), Some(b"hello".to_vec()));
3028 assert_eq!(buffer.get_text_range(6, 5), Some(b"world".to_vec()));
3029 assert_eq!(buffer.get_text_range(0, 11), Some(b"hello world".to_vec()));
3030 }
3031
3032 #[test]
3033 fn test_empty_operations() {
3034 let mut buffer = TextBuffer::from_bytes(b"hello".to_vec());
3035
3036 buffer.insert_bytes(2, Vec::new());
3037 assert_eq!(buffer.get_all_text().unwrap(), b"hello");
3038
3039 buffer.delete_bytes(2, 0);
3040 assert_eq!(buffer.get_all_text().unwrap(), b"hello");
3041 }
3042
3043 #[test]
3044 fn test_sequential_inserts_at_beginning() {
3045 let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec());
3047
3048 buffer.delete_bytes(0, 12);
3050 assert_eq!(buffer.get_all_text().unwrap(), b"");
3051
3052 buffer.insert_bytes(0, vec![b'a']);
3054 assert_eq!(buffer.get_all_text().unwrap(), b"a");
3055
3056 buffer.insert_bytes(0, vec![b'b']);
3058 assert_eq!(buffer.get_all_text().unwrap(), b"ba");
3059 }
3060
3061 mod large_file_support {
3064 use super::*;
3065 use crate::model::piece_tree::StringBuffer;
3066 use std::fs::File;
3067 use std::io::Write;
3068 use tempfile::TempDir;
3069
3070 #[test]
3073 fn test_line_feed_count_is_some_for_loaded_buffer() {
3074 let buffer = StringBuffer::new(0, b"hello\nworld\ntest".to_vec());
3075 assert_eq!(buffer.line_feed_count(), Some(2));
3076 }
3077
3078 #[test]
3079 fn test_line_feed_count_is_none_for_unloaded_buffer() {
3080 let temp_dir = TempDir::new().unwrap();
3081 let file_path = temp_dir.path().join("test.txt");
3082
3083 let buffer = StringBuffer::new_unloaded(0, file_path, 0, 100);
3084 assert_eq!(buffer.line_feed_count(), None);
3085 }
3086
3087 #[test]
3088 fn test_line_count_is_some_for_small_buffer() {
3089 let buffer = TextBuffer::from_bytes(b"hello\nworld\ntest".to_vec());
3090 assert_eq!(buffer.line_count(), Some(3));
3091 }
3092
3093 #[test]
3094 fn test_piece_tree_works_with_none_line_count() {
3095 let buffer = StringBuffer::new_loaded(0, b"hello\nworld".to_vec(), false);
3097 assert_eq!(buffer.line_feed_count(), None);
3098
3099 use crate::model::piece_tree::{BufferLocation, PieceTree};
3101 let tree = PieceTree::new(BufferLocation::Stored(0), 0, 11, None);
3102
3103 assert_eq!(tree.line_count(), None);
3105 }
3106
3107 #[test]
3110 fn test_buffer_data_loaded_variant() {
3111 let data = b"hello world".to_vec();
3112 let buffer = StringBuffer::new_loaded(0, data.clone(), true);
3113
3114 assert!(buffer.is_loaded());
3115 assert_eq!(buffer.get_data(), Some(&data[..]));
3116 assert!(buffer.get_line_starts().is_some());
3117 }
3118
3119 #[test]
3120 fn test_buffer_data_loaded_without_line_starts() {
3121 let data = b"hello\nworld".to_vec();
3122 let buffer = StringBuffer::new_loaded(0, data.clone(), false);
3123
3124 assert!(buffer.is_loaded());
3125 assert_eq!(buffer.get_data(), Some(&data[..]));
3126 assert_eq!(buffer.get_line_starts(), None); }
3128
3129 #[test]
3130 fn test_buffer_data_unloaded_variant() {
3131 let temp_dir = TempDir::new().unwrap();
3132 let file_path = temp_dir.path().join("test.txt");
3133
3134 let buffer = StringBuffer::new_unloaded(0, file_path.clone(), 0, 100);
3135
3136 assert!(!buffer.is_loaded());
3137 assert_eq!(buffer.get_data(), None);
3138 assert_eq!(buffer.get_line_starts(), None);
3139 }
3140
3141 #[test]
3142 fn test_buffer_load_method() {
3143 let temp_dir = TempDir::new().unwrap();
3144 let file_path = temp_dir.path().join("test.txt");
3145
3146 let test_data = b"hello world";
3148 File::create(&file_path)
3149 .unwrap()
3150 .write_all(test_data)
3151 .unwrap();
3152
3153 let mut buffer = StringBuffer::new_unloaded(0, file_path, 0, test_data.len());
3155 assert!(!buffer.is_loaded());
3156
3157 buffer.load().unwrap();
3159
3160 assert!(buffer.is_loaded());
3162 assert_eq!(buffer.get_data(), Some(&test_data[..]));
3163 }
3164
3165 #[test]
3166 fn test_string_buffer_new_vs_new_loaded() {
3167 let data = b"hello\nworld".to_vec();
3168
3169 let buf1 = StringBuffer::new(0, data.clone());
3171 assert!(buf1.is_loaded());
3172 assert!(buf1.get_line_starts().is_some());
3173 assert_eq!(buf1.line_feed_count(), Some(1));
3174
3175 let buf2 = StringBuffer::new_loaded(0, data.clone(), false);
3177 assert!(buf2.is_loaded());
3178 assert_eq!(buf2.get_line_starts(), None);
3179 assert_eq!(buf2.line_feed_count(), None);
3180 }
3181
3182 #[test]
3185 fn test_load_small_file_eager_loading() {
3186 let temp_dir = TempDir::new().unwrap();
3187 let file_path = temp_dir.path().join("small.txt");
3188
3189 let test_data = b"hello\ntest";
3191 File::create(&file_path)
3192 .unwrap()
3193 .write_all(test_data)
3194 .unwrap();
3195
3196 let buffer = TextBuffer::load_from_file(&file_path, 0).unwrap();
3198
3199 assert!(!buffer.large_file);
3201 assert_eq!(buffer.total_bytes(), test_data.len());
3202 assert_eq!(buffer.line_count(), Some(2)); assert_eq!(buffer.get_all_text().unwrap(), test_data);
3204
3205 assert!(buffer.buffers[0].is_loaded());
3207 }
3208
3209 #[test]
3210 fn test_load_large_file_lazy_loading() {
3211 let temp_dir = TempDir::new().unwrap();
3212 let file_path = temp_dir.path().join("large.txt");
3213
3214 let test_data = b"hello\nworld\ntest";
3216 File::create(&file_path)
3217 .unwrap()
3218 .write_all(test_data)
3219 .unwrap();
3220
3221 let buffer = TextBuffer::load_from_file(&file_path, 10).unwrap();
3223
3224 assert!(buffer.large_file);
3226 assert_eq!(buffer.total_bytes(), test_data.len());
3227
3228 assert_eq!(buffer.line_count(), None);
3230
3231 assert!(!buffer.buffers[0].is_loaded());
3233 assert_eq!(buffer.buffers[0].get_data(), None);
3234 }
3235
3236 #[test]
3244 fn test_issue_657_search_on_large_file_unloaded_buffer() {
3245 let temp_dir = TempDir::new().unwrap();
3246 let file_path = temp_dir.path().join("large_search_test.txt");
3247
3248 let test_data = b"line1\nline2\nSEARCH_TARGET\nline4\nline5";
3250 File::create(&file_path)
3251 .unwrap()
3252 .write_all(test_data)
3253 .unwrap();
3254
3255 let mut buffer = TextBuffer::load_from_file(&file_path, 10).unwrap();
3257
3258 assert!(buffer.large_file, "Buffer should be in large file mode");
3260 assert!(
3261 !buffer.buffers[0].is_loaded(),
3262 "Buffer should be unloaded initially"
3263 );
3264
3265 assert!(
3268 buffer.to_string().is_none(),
3269 "BUG REPRODUCED: to_string() returns None for unloaded buffer"
3270 );
3271
3272 let total_bytes = buffer.len();
3274 let content = buffer.get_text_range_mut(0, total_bytes).unwrap();
3275 let content_str = String::from_utf8_lossy(&content);
3276
3277 assert!(
3279 content_str.contains("SEARCH_TARGET"),
3280 "FIX WORKS: get_text_range_mut() loaded the buffer and found the search target"
3281 );
3282
3283 assert!(
3285 buffer.to_string().is_some(),
3286 "After get_text_range_mut(), to_string() should work"
3287 );
3288 }
3289
3290 #[test]
3291 fn test_large_file_threshold_boundary() {
3292 let temp_dir = TempDir::new().unwrap();
3293
3294 let file_path = temp_dir.path().join("at_threshold.txt");
3296 let test_data = vec![b'x'; 100];
3297 File::create(&file_path)
3298 .unwrap()
3299 .write_all(&test_data)
3300 .unwrap();
3301
3302 let buffer = TextBuffer::load_from_file(&file_path, 100).unwrap();
3304 assert!(buffer.large_file);
3305
3306 let file_path2 = temp_dir.path().join("below_threshold.txt");
3308 let test_data2 = vec![b'x'; 99];
3309 File::create(&file_path2)
3310 .unwrap()
3311 .write_all(&test_data2)
3312 .unwrap();
3313
3314 let buffer2 = TextBuffer::load_from_file(&file_path2, 100).unwrap();
3316 assert!(!buffer2.large_file);
3317 }
3318
3319 #[test]
3320 fn test_large_file_default_threshold() {
3321 let temp_dir = TempDir::new().unwrap();
3322 let file_path = temp_dir.path().join("test.txt");
3323
3324 File::create(&file_path)
3326 .unwrap()
3327 .write_all(b"hello")
3328 .unwrap();
3329
3330 let buffer = TextBuffer::load_from_file(&file_path, 0).unwrap();
3332
3333 assert!(!buffer.large_file);
3335 }
3336
3337 #[test]
3338 fn test_large_file_has_correct_piece_tree_structure() {
3339 let temp_dir = TempDir::new().unwrap();
3340 let file_path = temp_dir.path().join("large.txt");
3341
3342 let test_data = b"hello world";
3343 File::create(&file_path)
3344 .unwrap()
3345 .write_all(test_data)
3346 .unwrap();
3347
3348 let buffer = TextBuffer::load_from_file(&file_path, 5).unwrap();
3350
3351 assert_eq!(buffer.total_bytes(), test_data.len());
3353
3354 assert_eq!(buffer.buffers.len(), 1);
3356
3357 assert!(!buffer.buffers[0].is_loaded());
3359 }
3360
3361 #[test]
3362 fn test_empty_large_file() {
3363 let temp_dir = TempDir::new().unwrap();
3364 let file_path = temp_dir.path().join("empty.txt");
3365
3366 File::create(&file_path).unwrap();
3368
3369 let buffer = TextBuffer::load_from_file(&file_path, 0).unwrap();
3371
3372 assert_eq!(buffer.total_bytes(), 0);
3374 assert!(buffer.is_empty());
3375 }
3376
3377 #[test]
3378 fn test_large_file_basic_api_operations() {
3379 let temp_dir = TempDir::new().unwrap();
3380 let file_path = temp_dir.path().join("large_test.txt");
3381
3382 let test_data = b"line1\nline2\nline3\nline4\n";
3384 File::create(&file_path)
3385 .unwrap()
3386 .write_all(test_data)
3387 .unwrap();
3388
3389 let mut buffer = TextBuffer::load_from_file(&file_path, 10).unwrap();
3391
3392 assert!(buffer.large_file);
3394 assert_eq!(buffer.line_count(), None); assert_eq!(buffer.total_bytes(), test_data.len());
3398 assert!(!buffer.is_empty());
3399 assert_eq!(buffer.len(), test_data.len());
3400
3401 let range_result = buffer.get_text_range_mut(0, 5).unwrap();
3403 assert_eq!(range_result, b"line1");
3404
3405 let range_result2 = buffer.get_text_range_mut(6, 5).unwrap();
3406 assert_eq!(range_result2, b"line2");
3407
3408 let all_text = buffer.get_all_text().unwrap();
3410 assert_eq!(all_text, test_data);
3411
3412 assert_eq!(buffer.slice_bytes(0..5), b"line1");
3414
3415 buffer.insert_bytes(0, b"prefix_".to_vec());
3418 assert_eq!(buffer.total_bytes(), test_data.len() + 7);
3419 assert!(buffer.is_modified());
3420
3421 let text_after_insert = buffer.get_all_text().unwrap();
3423 assert_eq!(&text_after_insert[0..7], b"prefix_");
3424 assert_eq!(&text_after_insert[7..12], b"line1");
3425
3426 buffer.delete_bytes(0, 7);
3428 assert_eq!(buffer.total_bytes(), test_data.len());
3429
3430 let text_after_delete = buffer.get_all_text().unwrap();
3432 assert_eq!(text_after_delete, test_data);
3433
3434 let end_offset = buffer.total_bytes();
3436 buffer.insert_bytes(end_offset, b"suffix".to_vec());
3437 assert_eq!(buffer.total_bytes(), test_data.len() + 6);
3438
3439 let final_text = buffer.get_all_text().unwrap();
3441 assert!(final_text.ends_with(b"suffix"));
3442 assert_eq!(&final_text[0..test_data.len()], test_data);
3443
3444 let pos = buffer.offset_to_position(0).unwrap();
3448 assert_eq!(pos.column, 0);
3449
3450 let offset = buffer.position_to_offset(Position { line: 0, column: 0 });
3452 assert_eq!(offset, 0);
3453
3454 let replace_result = buffer.replace_range(0..5, "START");
3456 assert!(replace_result);
3457
3458 let text_after_replace = buffer.get_all_text().unwrap();
3459 assert!(text_after_replace.starts_with(b"START"));
3460 }
3461
3462 #[test]
3463 fn test_large_file_chunk_based_loading() {
3464 let temp_dir = TempDir::new().unwrap();
3465 let file_path = temp_dir.path().join("huge.txt");
3466
3467 let chunk_size = LOAD_CHUNK_SIZE; let file_size = chunk_size * 3; let mut file = File::create(&file_path).unwrap();
3474 file.write_all(&vec![b'A'; chunk_size]).unwrap();
3475 file.write_all(&vec![b'B'; chunk_size]).unwrap();
3476 file.write_all(&vec![b'C'; chunk_size]).unwrap();
3477 file.flush().unwrap();
3478
3479 let mut buffer = TextBuffer::load_from_file(&file_path, 1).unwrap();
3481
3482 assert!(buffer.large_file);
3484 assert_eq!(buffer.total_bytes(), file_size);
3485
3486 assert!(!buffer.buffers[0].is_loaded());
3488
3489 let first_chunk_data = buffer.get_text_range_mut(0, 1024).unwrap();
3491 assert_eq!(first_chunk_data.len(), 1024);
3492 assert!(first_chunk_data.iter().all(|&b| b == b'A'));
3493
3494 let second_chunk_data = buffer.get_text_range_mut(chunk_size, 1024).unwrap();
3496 assert_eq!(second_chunk_data.len(), 1024);
3497 assert!(second_chunk_data.iter().all(|&b| b == b'B'));
3498
3499 let third_chunk_data = buffer.get_text_range_mut(chunk_size * 2, 1024).unwrap();
3501 assert_eq!(third_chunk_data.len(), 1024);
3502 assert!(third_chunk_data.iter().all(|&b| b == b'C'));
3503
3504 let cross_chunk_offset = chunk_size - 512;
3507 let cross_chunk_data = buffer.get_text_range_mut(cross_chunk_offset, 1024).unwrap();
3508 assert_eq!(cross_chunk_data.len(), 1024);
3509 assert!(cross_chunk_data[..512].iter().all(|&b| b == b'A'));
3511 assert!(cross_chunk_data[512..].iter().all(|&b| b == b'B'));
3512
3513 assert!(
3516 buffer.buffers.len() > 1,
3517 "Expected multiple buffers after chunk-based loading, got {}",
3518 buffer.buffers.len()
3519 );
3520
3521 buffer.insert_bytes(0, b"PREFIX".to_vec());
3523 assert_eq!(buffer.total_bytes(), file_size + 6);
3524
3525 let after_insert = buffer.get_text_range_mut(0, 6).unwrap();
3526 assert_eq!(after_insert, b"PREFIX");
3527
3528 let after_prefix = buffer.get_text_range_mut(6, 10).unwrap();
3530 assert!(after_prefix.iter().all(|&b| b == b'A'));
3531
3532 let mut buffer2 = TextBuffer::load_from_file(&file_path, 1).unwrap();
3535
3536 let chunk_read_size = 64 * 1024; let mut offset = 0;
3539 while offset < file_size {
3540 let bytes_to_read = chunk_read_size.min(file_size - offset);
3541 let chunk_data = buffer2.get_text_range_mut(offset, bytes_to_read).unwrap();
3542
3543 let first_mb_end = chunk_size;
3545 let second_mb_end = chunk_size * 2;
3546
3547 for (i, &byte) in chunk_data.iter().enumerate() {
3549 let file_offset = offset + i;
3550 let expected = if file_offset < first_mb_end {
3551 b'A'
3552 } else if file_offset < second_mb_end {
3553 b'B'
3554 } else {
3555 b'C'
3556 };
3557 assert_eq!(
3558 byte, expected,
3559 "Mismatch at file offset {}: expected {}, got {}",
3560 file_offset, expected as char, byte as char
3561 );
3562 }
3563
3564 offset += bytes_to_read;
3565 }
3566 }
3567
3568 #[test]
3572 fn test_large_file_incremental_save() {
3573 let temp_dir = TempDir::new().unwrap();
3574 let file_path = temp_dir.path().join("large_save_test.txt");
3575
3576 let chunk_size = 1000; let file_size = chunk_size * 2; let mut file = File::create(&file_path).unwrap();
3581 file.write_all(&vec![b'A'; chunk_size]).unwrap();
3583 file.write_all(&vec![b'B'; chunk_size]).unwrap();
3585 file.flush().unwrap();
3586
3587 let mut buffer = TextBuffer::load_from_file(&file_path, 100).unwrap();
3589 assert!(buffer.large_file);
3590 assert_eq!(buffer.total_bytes(), file_size);
3591
3592 let first_bytes = buffer.get_text_range_mut(0, 50).unwrap();
3594 assert!(first_bytes.iter().all(|&b| b == b'A'));
3595
3596 buffer.insert_bytes(0, b"PREFIX_".to_vec());
3598
3599 let save_path = temp_dir.path().join("saved.txt");
3601 buffer.save_to_file(&save_path).unwrap();
3602
3603 let saved_content = std::fs::read(&save_path).unwrap();
3605
3606 assert_eq!(
3608 saved_content.len(),
3609 file_size + 7,
3610 "Saved file should be {} bytes, got {}",
3611 file_size + 7,
3612 saved_content.len()
3613 );
3614
3615 assert_eq!(&saved_content[..7], b"PREFIX_", "Should start with PREFIX_");
3617
3618 assert!(
3620 saved_content[7..100].iter().all(|&b| b == b'A'),
3621 "First chunk after prefix should be A's"
3622 );
3623
3624 let second_chunk_start = 7 + chunk_size;
3626 assert!(
3627 saved_content[second_chunk_start..second_chunk_start + 100]
3628 .iter()
3629 .all(|&b| b == b'B'),
3630 "Second chunk should be B's (was unloaded, should be preserved)"
3631 );
3632 }
3633
3634 #[test]
3636 fn test_large_file_save_with_multiple_edits() {
3637 let temp_dir = TempDir::new().unwrap();
3638 let file_path = temp_dir.path().join("multi_edit.txt");
3639
3640 let mut content = Vec::new();
3642 for i in 0..100 {
3643 content.extend_from_slice(
3644 format!("Line {:04}: padding to make it longer\n", i).as_bytes(),
3645 );
3646 }
3647 let original_len = content.len();
3648 std::fs::write(&file_path, &content).unwrap();
3649
3650 let mut buffer = TextBuffer::load_from_file(&file_path, 500).unwrap();
3652 assert!(
3653 buffer.line_count().is_none(),
3654 "Should be in large file mode"
3655 );
3656
3657 buffer.insert_bytes(0, b"[START]".to_vec());
3659
3660 let mid_offset = original_len / 2;
3662 let _mid_bytes = buffer.get_text_range_mut(mid_offset + 7, 10).unwrap(); buffer.insert_bytes(mid_offset + 7, b"[MIDDLE]".to_vec());
3664
3665 let save_path = temp_dir.path().join("multi_edit_saved.txt");
3667 buffer.save_to_file(&save_path).unwrap();
3668
3669 let saved = std::fs::read_to_string(&save_path).unwrap();
3671
3672 assert!(
3673 saved.starts_with("[START]Line 0000"),
3674 "Should start with our edit"
3675 );
3676 assert!(saved.contains("[MIDDLE]"), "Should contain middle edit");
3677 assert!(saved.contains("Line 0099"), "Should preserve end of file");
3678
3679 let expected_len = original_len + 7 + 8; assert_eq!(
3682 saved.len(),
3683 expected_len,
3684 "Length should be original + edits"
3685 );
3686 }
3687 }
3688
3689 #[test]
3693 fn test_offset_to_position_simple() {
3694 let content = b"a\nb\nc\nd";
3700 let buffer = TextBuffer::from_bytes(content.to_vec());
3701
3702 let pos = buffer
3704 .offset_to_position(0)
3705 .expect("small buffer should have line metadata");
3706 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
3707 assert_eq!(pos.column, 0);
3708
3709 let pos = buffer
3710 .offset_to_position(1)
3711 .expect("small buffer should have line metadata");
3712 assert_eq!(pos.line, 0, "Byte 1 (newline) should be on line 0");
3713 assert_eq!(pos.column, 1);
3714
3715 let pos = buffer
3716 .offset_to_position(2)
3717 .expect("small buffer should have line metadata");
3718 assert_eq!(pos.line, 1, "Byte 2 should be on line 1");
3719 assert_eq!(pos.column, 0);
3720
3721 let pos = buffer
3722 .offset_to_position(3)
3723 .expect("small buffer should have line metadata");
3724 assert_eq!(pos.line, 1, "Byte 3 (newline) should be on line 1");
3725 assert_eq!(pos.column, 1);
3726
3727 let pos = buffer
3728 .offset_to_position(4)
3729 .expect("small buffer should have line metadata");
3730 assert_eq!(pos.line, 2, "Byte 4 should be on line 2");
3731 assert_eq!(pos.column, 0);
3732
3733 let pos = buffer
3734 .offset_to_position(6)
3735 .expect("small buffer should have line metadata");
3736 assert_eq!(pos.line, 3, "Byte 6 should be on line 3");
3737 assert_eq!(pos.column, 0);
3738 }
3739
3740 #[test]
3741 fn test_offset_to_position_after_insert() {
3742 let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec());
3744
3745 buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
3747
3748 let pos = buffer
3754 .offset_to_position(0)
3755 .expect("small buffer should have line metadata");
3756 assert_eq!(pos.line, 0, "Byte 0 should still be on line 0");
3757
3758 let pos = buffer
3759 .offset_to_position(2)
3760 .expect("small buffer should have line metadata");
3761 assert_eq!(
3762 pos.line, 1,
3763 "Byte 2 (start of inserted line) should be on line 1"
3764 );
3765
3766 let pos = buffer
3767 .offset_to_position(4)
3768 .expect("small buffer should have line metadata");
3769 assert_eq!(
3770 pos.line, 2,
3771 "Byte 4 (start of 'b') should be on line 2 after insert"
3772 );
3773 }
3774
3775 #[test]
3776 fn test_offset_to_position_empty_lines() {
3777 let buffer = TextBuffer::from_bytes(b"\n\n\n".to_vec());
3779
3780 let pos = buffer
3786 .offset_to_position(0)
3787 .expect("small buffer should have line metadata");
3788 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
3789
3790 let pos = buffer
3791 .offset_to_position(1)
3792 .expect("small buffer should have line metadata");
3793 assert_eq!(pos.line, 1, "Byte 1 should be on line 1");
3794
3795 let pos = buffer
3796 .offset_to_position(2)
3797 .expect("small buffer should have line metadata");
3798 assert_eq!(pos.line, 2, "Byte 2 should be on line 2");
3799
3800 let pos = buffer
3801 .offset_to_position(3)
3802 .expect("small buffer should have line metadata");
3803 assert_eq!(pos.line, 3, "Byte 3 (EOF) should be on line 3");
3804 }
3805
3806 #[test]
3807 fn test_offset_to_position_long_lines() {
3808 let mut content = Vec::new();
3810 content.extend_from_slice(b"aaaaaaaaaa\n"); content.extend_from_slice(b"bbbbbbbbbb\n"); content.extend_from_slice(b"cccccccccc"); let buffer = TextBuffer::from_bytes(content.clone());
3815
3816 let pos = buffer
3818 .offset_to_position(0)
3819 .expect("small buffer should have line metadata");
3820 assert_eq!(pos.line, 0, "Byte 0 should be on line 0");
3821 assert_eq!(pos.column, 0);
3822
3823 let pos = buffer
3824 .offset_to_position(11)
3825 .expect("small buffer should have line metadata");
3826 assert_eq!(pos.line, 1, "Byte 11 (start of line 1) should be on line 1");
3827 assert_eq!(pos.column, 0);
3828
3829 let pos = buffer
3830 .offset_to_position(22)
3831 .expect("small buffer should have line metadata");
3832 assert_eq!(pos.line, 2, "Byte 22 (start of line 2) should be on line 2");
3833 assert_eq!(pos.column, 0);
3834
3835 let pos = buffer
3837 .offset_to_position(5)
3838 .expect("small buffer should have line metadata");
3839 assert_eq!(pos.line, 0, "Byte 5 should be on line 0");
3840 assert_eq!(pos.column, 5);
3841
3842 let pos = buffer
3843 .offset_to_position(16)
3844 .expect("small buffer should have line metadata");
3845 assert_eq!(pos.line, 1, "Byte 16 should be on line 1");
3846 assert_eq!(pos.column, 5);
3847 }
3848
3849 #[test]
3850 fn test_line_iterator_with_offset_to_position() {
3851 let mut buffer = TextBuffer::from_bytes(b"line0\nline1\nline2\n".to_vec());
3853
3854 for byte_pos in 0..=buffer.len() {
3856 let iter = buffer.line_iterator(byte_pos, 80);
3857 let iter_pos = iter.current_position();
3858 let expected_line = buffer
3859 .offset_to_position(byte_pos)
3860 .expect("small buffer should have line metadata")
3861 .line;
3862 let expected_line_start = buffer.position_to_offset(Position {
3863 line: expected_line,
3864 column: 0,
3865 });
3866
3867 assert_eq!(
3868 iter_pos, expected_line_start,
3869 "LineIterator at byte {} should position at line start {} but got {}",
3870 byte_pos, expected_line_start, iter_pos
3871 );
3872 }
3873 }
3874
3875 #[test]
3876 fn test_piece_tree_line_count_after_insert() {
3877 let mut buffer = TextBuffer::from_bytes(b"a\nb\n".to_vec());
3879
3880 buffer.insert_at_position(Position { line: 1, column: 0 }, b"x\n".to_vec());
3882
3883 let content = buffer.slice_bytes(0..buffer.len());
3885 let newline_count = content.iter().filter(|&&b| b == b'\n').count();
3886 let expected_line_count = newline_count + 1;
3887 let actual_line_count = buffer.line_count();
3888
3889 assert_eq!(
3890 actual_line_count,
3891 Some(expected_line_count),
3892 "Line count mismatch after insert"
3893 );
3894 }
3895
3896 #[test]
3897 fn test_position_to_lsp_position_after_modification() {
3898 let initial = b"fn foo(val: i32) {\n val + 1\n}\n";
3905 let mut buffer = TextBuffer::from_bytes(initial.to_vec());
3906
3907 let (line, char) = buffer.position_to_lsp_position(23);
3910 assert_eq!(line, 1, "Initial: position 23 should be on line 1");
3911 assert_eq!(char, 4, "Initial: position 23 should be at char 4");
3912
3913 buffer.delete_range(
3916 Position { line: 1, column: 4 },
3917 Position { line: 1, column: 7 },
3918 );
3919 buffer.insert_bytes(23, b"value".to_vec()); buffer.delete_range(
3924 Position { line: 0, column: 7 },
3925 Position {
3926 line: 0,
3927 column: 10,
3928 },
3929 );
3930 buffer.insert_bytes(7, b"value".to_vec()); let content = String::from_utf8_lossy(&buffer.get_all_text().unwrap()).to_string();
3934 assert_eq!(content, "fn foo(value: i32) {\n value + 1\n}\n");
3935
3936 let (line, char) = buffer.position_to_lsp_position(25);
3943 assert_eq!(
3944 line, 1,
3945 "After modification: position 25 should be on line 1"
3946 );
3947 assert_eq!(
3948 char, 4,
3949 "After modification: position 25 should be at char 4"
3950 );
3951
3952 let (line, char) = buffer.position_to_lsp_position(21);
3954 assert_eq!(line, 1, "Position 21 should be on line 1");
3955 assert_eq!(char, 0, "Position 21 should be at char 0 (start of line)");
3956 }
3957
3958 #[test]
3959 fn test_detect_crlf() {
3960 assert_eq!(
3961 TextBuffer::detect_line_ending(b"hello\r\nworld\r\n"),
3962 LineEnding::CRLF
3963 );
3964 }
3965
3966 #[test]
3967 fn test_detect_lf() {
3968 assert_eq!(
3969 TextBuffer::detect_line_ending(b"hello\nworld\n"),
3970 LineEnding::LF
3971 );
3972 }
3973
3974 #[test]
3975 fn test_normalize_crlf() {
3976 let input = b"hello\r\nworld\r\n".to_vec();
3977 let output = TextBuffer::normalize_line_endings(input);
3978 assert_eq!(output, b"hello\nworld\n");
3979 }
3980
3981 #[test]
3982 fn test_normalize_empty() {
3983 let input = Vec::new();
3984 let output = TextBuffer::normalize_line_endings(input);
3985 assert_eq!(output, Vec::<u8>::new());
3986 }
3987
3988 #[test]
3995 fn test_get_all_text_returns_empty_for_unloaded_buffers() {
3996 use tempfile::TempDir;
3997 let temp_dir = TempDir::new().unwrap();
3998 let file_path = temp_dir.path().join("large_test.txt");
3999
4000 let original_content = "X".repeat(50_000);
4002 std::fs::write(&file_path, &original_content).unwrap();
4003
4004 let mut buffer = TextBuffer::load_from_file(&file_path, 1024).unwrap();
4006 assert!(buffer.large_file, "Should be in large file mode");
4007 assert!(!buffer.buffers[0].is_loaded(), "Buffer should be unloaded");
4008
4009 buffer.insert_bytes(0, b"EDITED: ".to_vec());
4011
4012 let content_immutable = buffer.get_all_text();
4015
4016 assert!(
4019 content_immutable.is_none(),
4020 "get_all_text() should return None for large files with unloaded regions. \
4021 Got Some({} bytes) instead of None.",
4022 content_immutable.as_ref().map(|c| c.len()).unwrap_or(0)
4023 );
4024
4025 let total = buffer.total_bytes();
4027 let content_lazy = buffer.get_text_range_mut(0, total).unwrap();
4028 assert_eq!(
4029 content_lazy.len(),
4030 50_000 + 8,
4031 "get_text_range_mut() should return all content with lazy loading"
4032 );
4033 assert!(
4034 String::from_utf8_lossy(&content_lazy).starts_with("EDITED: "),
4035 "Content should start with our edit"
4036 );
4037 }
4038
4039 mod line_ending_conversion {
4042 use super::*;
4043
4044 #[test]
4045 fn test_convert_lf_to_crlf() {
4046 let input = b"Line 1\nLine 2\nLine 3\n";
4047 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4048 assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
4049 }
4050
4051 #[test]
4052 fn test_convert_crlf_to_lf() {
4053 let input = b"Line 1\r\nLine 2\r\nLine 3\r\n";
4054 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
4055 assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
4056 }
4057
4058 #[test]
4059 fn test_convert_cr_to_lf() {
4060 let input = b"Line 1\rLine 2\rLine 3\r";
4061 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
4062 assert_eq!(result, b"Line 1\nLine 2\nLine 3\n");
4063 }
4064
4065 #[test]
4066 fn test_convert_mixed_to_crlf() {
4067 let input = b"Line 1\nLine 2\r\nLine 3\r";
4069 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4070 assert_eq!(result, b"Line 1\r\nLine 2\r\nLine 3\r\n");
4071 }
4072
4073 #[test]
4074 fn test_convert_lf_to_lf_is_noop() {
4075 let input = b"Line 1\nLine 2\nLine 3\n";
4076 let result = TextBuffer::convert_line_endings_to(input, LineEnding::LF);
4077 assert_eq!(result, input.to_vec());
4078 }
4079
4080 #[test]
4081 fn test_convert_empty_content() {
4082 let input = b"";
4083 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4084 assert_eq!(result, b"".to_vec());
4085 }
4086
4087 #[test]
4088 fn test_convert_no_line_endings() {
4089 let input = b"No line endings here";
4090 let result = TextBuffer::convert_line_endings_to(input, LineEnding::CRLF);
4091 assert_eq!(result, b"No line endings here".to_vec());
4092 }
4093
4094 #[test]
4095 fn test_set_line_ending_marks_modified() {
4096 let mut buffer = TextBuffer::from_bytes(b"Hello\nWorld\n".to_vec());
4097 assert!(!buffer.is_modified());
4098
4099 buffer.set_line_ending(LineEnding::CRLF);
4100 assert!(buffer.is_modified());
4101 }
4102
4103 #[test]
4104 fn test_set_default_line_ending_does_not_mark_modified() {
4105 let mut buffer = TextBuffer::empty();
4106 assert!(!buffer.is_modified());
4107
4108 buffer.set_default_line_ending(LineEnding::CRLF);
4109 assert!(!buffer.is_modified());
4110 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
4111 }
4112
4113 #[test]
4114 fn test_save_to_file_converts_lf_to_crlf() {
4115 use tempfile::TempDir;
4116
4117 let temp_dir = TempDir::new().unwrap();
4118 let file_path = temp_dir.path().join("test_lf_to_crlf.txt");
4119
4120 let original_content = b"Line 1\nLine 2\nLine 3\n";
4122 std::fs::write(&file_path, original_content).unwrap();
4123
4124 let mut buffer =
4126 TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD).unwrap();
4127 assert_eq!(buffer.line_ending(), LineEnding::LF);
4128
4129 buffer.set_line_ending(LineEnding::CRLF);
4131 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
4132 assert!(buffer.is_modified());
4133
4134 buffer.save_to_file(&file_path).unwrap();
4136
4137 let saved_bytes = std::fs::read(&file_path).unwrap();
4139 assert_eq!(&saved_bytes, b"Line 1\r\nLine 2\r\nLine 3\r\n");
4140 }
4141
4142 #[test]
4143 fn test_save_to_file_converts_crlf_to_lf() {
4144 use tempfile::TempDir;
4145
4146 let temp_dir = TempDir::new().unwrap();
4147 let file_path = temp_dir.path().join("test_crlf_to_lf.txt");
4148
4149 let original_content = b"Line 1\r\nLine 2\r\nLine 3\r\n";
4151 std::fs::write(&file_path, original_content).unwrap();
4152
4153 let mut buffer =
4155 TextBuffer::load_from_file(&file_path, DEFAULT_LARGE_FILE_THRESHOLD).unwrap();
4156 assert_eq!(buffer.line_ending(), LineEnding::CRLF);
4157
4158 buffer.set_line_ending(LineEnding::LF);
4160 assert_eq!(buffer.line_ending(), LineEnding::LF);
4161 assert!(buffer.is_modified());
4162
4163 buffer.save_to_file(&file_path).unwrap();
4165
4166 let saved_bytes = std::fs::read(&file_path).unwrap();
4168 assert_eq!(&saved_bytes, b"Line 1\nLine 2\nLine 3\n");
4169 }
4170
4171 #[test]
4172 #[cfg(unix)]
4173 fn test_save_to_unwritable_file() -> anyhow::Result<()> {
4174 use std::fs::Permissions;
4175 use std::os::unix::fs::PermissionsExt;
4176 use tempfile::TempDir;
4177
4178 let temp_dir = TempDir::new().unwrap();
4179 let unwritable_dir = temp_dir.path().join("unwritable_dir");
4180 std::fs::create_dir(&unwritable_dir)?;
4181
4182 let file_path = unwritable_dir.join("unwritable.txt");
4183 std::fs::write(&file_path, "original content")?;
4184
4185 std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
4187
4188 let mut buffer = TextBuffer::from_bytes(b"new content".to_vec());
4189 let result = buffer.save_to_file(&file_path);
4190
4191 match result {
4193 Err(e) => {
4194 if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
4195 assert_eq!(sudo_err.dest_path, file_path);
4196 assert!(sudo_err.temp_path.exists());
4197 let _ = std::fs::remove_file(&sudo_err.temp_path);
4199 } else {
4200 panic!("Expected SudoSaveRequired error, got: {:?}", e);
4201 }
4202 }
4203 Ok(_) => panic!("Expected error, but save succeeded"),
4204 }
4205
4206 Ok(())
4207 }
4208
4209 #[test]
4210 #[cfg(unix)]
4211 fn test_save_to_unwritable_directory() -> anyhow::Result<()> {
4212 use std::fs::Permissions;
4213 use std::os::unix::fs::PermissionsExt;
4214 use tempfile::TempDir;
4215
4216 let temp_dir = TempDir::new().unwrap();
4217 let unwritable_dir = temp_dir.path().join("unwritable_dir");
4218 std::fs::create_dir(&unwritable_dir)?;
4219
4220 let file_path = unwritable_dir.join("test.txt");
4221
4222 std::fs::set_permissions(&unwritable_dir, Permissions::from_mode(0o555))?;
4224
4225 let mut buffer = TextBuffer::from_bytes(b"content".to_vec());
4226 let result = buffer.save_to_file(&file_path);
4227
4228 match result {
4229 Err(e) => {
4230 if let Some(sudo_err) = e.downcast_ref::<SudoSaveRequired>() {
4231 assert_eq!(sudo_err.dest_path, file_path);
4232 assert!(sudo_err.temp_path.exists());
4233 assert!(sudo_err.temp_path.starts_with(std::env::temp_dir()));
4235 let _ = std::fs::remove_file(&sudo_err.temp_path);
4237 } else {
4238 panic!("Expected SudoSaveRequired error, got: {:?}", e);
4239 }
4240 }
4241 Ok(_) => panic!("Expected error, but save succeeded"),
4242 }
4243
4244 Ok(())
4245 }
4246 }
4247}
4248
4249#[cfg(test)]
4250mod property_tests {
4251 use super::*;
4252 use proptest::prelude::*;
4253
4254 fn text_with_newlines() -> impl Strategy<Value = Vec<u8>> {
4256 prop::collection::vec(
4257 prop_oneof![(b'a'..=b'z').prop_map(|c| c), Just(b'\n'),],
4258 0..100,
4259 )
4260 }
4261
4262 #[derive(Debug, Clone)]
4264 enum Operation {
4265 Insert { offset: usize, text: Vec<u8> },
4266 Delete { offset: usize, bytes: usize },
4267 }
4268
4269 fn operation_strategy() -> impl Strategy<Value = Vec<Operation>> {
4270 prop::collection::vec(
4271 prop_oneof![
4272 (0usize..200, text_with_newlines())
4273 .prop_map(|(offset, text)| { Operation::Insert { offset, text } }),
4274 (0usize..200, 1usize..50)
4275 .prop_map(|(offset, bytes)| { Operation::Delete { offset, bytes } }),
4276 ],
4277 0..50,
4278 )
4279 }
4280
4281 proptest! {
4282 #[test]
4283 fn prop_line_count_consistent(text in text_with_newlines()) {
4284 let buffer = TextBuffer::from_bytes(text.clone());
4285
4286 let newline_count = text.iter().filter(|&&b| b == b'\n').count();
4287 prop_assert_eq!(buffer.line_count(), Some(newline_count + 1));
4288 }
4289
4290 #[test]
4291 fn prop_get_all_text_matches_original(text in text_with_newlines()) {
4292 let buffer = TextBuffer::from_bytes(text.clone());
4293 prop_assert_eq!(buffer.get_all_text().unwrap(), text);
4294 }
4295
4296 #[test]
4297 fn prop_insert_increases_size(
4298 text in text_with_newlines(),
4299 offset in 0usize..100,
4300 insert_text in text_with_newlines()
4301 ) {
4302 let mut buffer = TextBuffer::from_bytes(text);
4303 let initial_bytes = buffer.total_bytes();
4304
4305 let offset = offset.min(buffer.total_bytes());
4306 buffer.insert_bytes(offset, insert_text.clone());
4307
4308 prop_assert_eq!(buffer.total_bytes(), initial_bytes + insert_text.len());
4309 }
4310
4311 #[test]
4312 fn prop_delete_decreases_size(
4313 text in text_with_newlines(),
4314 offset in 0usize..100,
4315 delete_bytes in 1usize..50
4316 ) {
4317 if text.is_empty() {
4318 return Ok(());
4319 }
4320
4321 let mut buffer = TextBuffer::from_bytes(text);
4322 let initial_bytes = buffer.total_bytes();
4323
4324 let offset = offset.min(buffer.total_bytes());
4325 let delete_bytes = delete_bytes.min(buffer.total_bytes() - offset);
4326
4327 if delete_bytes == 0 {
4328 return Ok(());
4329 }
4330
4331 buffer.delete_bytes(offset, delete_bytes);
4332
4333 prop_assert_eq!(buffer.total_bytes(), initial_bytes - delete_bytes);
4334 }
4335
4336 #[test]
4337 fn prop_insert_then_delete_restores_original(
4338 text in text_with_newlines(),
4339 offset in 0usize..100,
4340 insert_text in text_with_newlines()
4341 ) {
4342 let mut buffer = TextBuffer::from_bytes(text.clone());
4343
4344 let offset = offset.min(buffer.total_bytes());
4345 buffer.insert_bytes(offset, insert_text.clone());
4346 buffer.delete_bytes(offset, insert_text.len());
4347
4348 prop_assert_eq!(buffer.get_all_text().unwrap(), text);
4349 }
4350
4351 #[test]
4352 fn prop_offset_position_roundtrip(text in text_with_newlines()) {
4353 let buffer = TextBuffer::from_bytes(text.clone());
4354
4355 for offset in 0..text.len() {
4356 let pos = buffer.offset_to_position(offset).expect("offset_to_position should succeed for valid offset");
4357 let back = buffer.position_to_offset(pos);
4358 prop_assert_eq!(back, offset, "Failed roundtrip for offset {}", offset);
4359 }
4360 }
4361
4362 #[test]
4363 fn prop_get_text_range_valid(
4364 text in text_with_newlines(),
4365 offset in 0usize..100,
4366 length in 1usize..50
4367 ) {
4368 if text.is_empty() {
4369 return Ok(());
4370 }
4371
4372 let buffer = TextBuffer::from_bytes(text.clone());
4373 let offset = offset.min(buffer.total_bytes());
4374 let length = length.min(buffer.total_bytes() - offset);
4375
4376 if length == 0 {
4377 return Ok(());
4378 }
4379
4380 let result = buffer.get_text_range(offset, length);
4381 prop_assert_eq!(result, Some(text[offset..offset + length].to_vec()));
4382 }
4383
4384 #[test]
4385 fn prop_operations_maintain_consistency(operations in operation_strategy()) {
4386 let mut buffer = TextBuffer::from_bytes(b"initial\ntext".to_vec());
4387 let mut expected_text = b"initial\ntext".to_vec();
4388
4389 for op in operations {
4390 match op {
4391 Operation::Insert { offset, text } => {
4392 let offset = offset.min(buffer.total_bytes());
4393 buffer.insert_bytes(offset, text.clone());
4394
4395 let offset = offset.min(expected_text.len());
4397 expected_text.splice(offset..offset, text);
4398 }
4399 Operation::Delete { offset, bytes } => {
4400 if offset < buffer.total_bytes() {
4401 let bytes = bytes.min(buffer.total_bytes() - offset);
4402 buffer.delete_bytes(offset, bytes);
4403
4404 if offset < expected_text.len() {
4406 let bytes = bytes.min(expected_text.len() - offset);
4407 expected_text.drain(offset..offset + bytes);
4408 }
4409 }
4410 }
4411 }
4412 }
4413
4414 prop_assert_eq!(buffer.get_all_text().unwrap(), expected_text);
4415 }
4416
4417 #[test]
4418 fn prop_line_count_never_zero(operations in operation_strategy()) {
4419 let mut buffer = TextBuffer::from_bytes(b"test".to_vec());
4420
4421 for op in operations {
4422 match op {
4423 Operation::Insert { offset, text } => {
4424 let offset = offset.min(buffer.total_bytes());
4425 buffer.insert_bytes(offset, text);
4426 }
4427 Operation::Delete { offset, bytes } => {
4428 buffer.delete_bytes(offset, bytes);
4429 }
4430 }
4431
4432 prop_assert!(buffer.line_count().unwrap_or(1) >= 1);
4434 }
4435 }
4436
4437 #[test]
4438 fn prop_total_bytes_never_negative(operations in operation_strategy()) {
4439 let mut buffer = TextBuffer::from_bytes(b"test".to_vec());
4440
4441 for op in operations {
4442 match op {
4443 Operation::Insert { offset, text } => {
4444 let offset = offset.min(buffer.total_bytes());
4445 buffer.insert_bytes(offset, text);
4446 }
4447 Operation::Delete { offset, bytes } => {
4448 buffer.delete_bytes(offset, bytes);
4449 }
4450 }
4451
4452 prop_assert!(buffer.total_bytes() < 10_000_000);
4454 }
4455 }
4456
4457 #[test]
4458 fn prop_piece_tree_and_line_index_stay_synced(operations in operation_strategy()) {
4459 let mut buffer = TextBuffer::from_bytes(b"line1\nline2\nline3".to_vec());
4460
4461 for op in operations {
4462 match op {
4463 Operation::Insert { offset, text } => {
4464 let offset = offset.min(buffer.total_bytes());
4465 buffer.insert_bytes(offset, text);
4466 }
4467 Operation::Delete { offset, bytes } => {
4468 buffer.delete_bytes(offset, bytes);
4469 }
4470 }
4471
4472 if buffer.total_bytes() > 0 {
4474 let mid_offset = buffer.total_bytes() / 2;
4475 if let Some(pos) = buffer.offset_to_position(mid_offset) {
4476 let back = buffer.position_to_offset(pos);
4477
4478 prop_assert!(back <= buffer.total_bytes());
4480 }
4481 }
4482 }
4483 }
4484 }
4485
4486 #[test]
4487 fn test_detect_binary_text_files() {
4488 assert!(!TextBuffer::detect_binary(b"Hello, world!"));
4490 assert!(!TextBuffer::detect_binary(b"Line 1\nLine 2\nLine 3"));
4491 assert!(!TextBuffer::detect_binary(b"Tabs\tand\tnewlines\n"));
4492 assert!(!TextBuffer::detect_binary(b"Carriage return\r\n"));
4493
4494 assert!(!TextBuffer::detect_binary(b""));
4496
4497 assert!(!TextBuffer::detect_binary(b"\x1b[31mRed text\x1b[0m"));
4499 }
4500
4501 #[test]
4502 fn test_detect_binary_binary_files() {
4503 assert!(TextBuffer::detect_binary(b"Hello\x00World"));
4505 assert!(TextBuffer::detect_binary(b"\x00"));
4506
4507 assert!(TextBuffer::detect_binary(b"Text with \x01 control char"));
4509 assert!(TextBuffer::detect_binary(b"\x02\x03\x04"));
4510
4511 assert!(TextBuffer::detect_binary(b"Text with DEL\x7F"));
4513 }
4514
4515 #[test]
4516 fn test_detect_binary_png_file() {
4517 let png_header: &[u8] = &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
4520 assert!(TextBuffer::detect_binary(png_header));
4521
4522 let mut png_data = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
4524 png_data.extend_from_slice(b"\x00\x00\x00\x0DIHDR"); assert!(TextBuffer::detect_binary(&png_data));
4526 }
4527
4528 #[test]
4529 fn test_detect_binary_other_image_formats() {
4530 let jpeg_header: &[u8] = &[0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10];
4532 assert!(TextBuffer::detect_binary(jpeg_header));
4533
4534 let gif_data: &[u8] = &[
4537 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, ];
4543 assert!(TextBuffer::detect_binary(gif_data));
4545
4546 let bmp_header: &[u8] = &[0x42, 0x4D, 0x00, 0x00, 0x00, 0x00];
4548 assert!(TextBuffer::detect_binary(bmp_header));
4549 }
4550
4551 #[test]
4552 fn test_detect_binary_executable_formats() {
4553 let elf_header: &[u8] = &[0x7F, 0x45, 0x4C, 0x46, 0x02, 0x01, 0x01, 0x00];
4555 assert!(TextBuffer::detect_binary(elf_header));
4556
4557 let macho_header: &[u8] = &[0xCF, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x01];
4559 assert!(TextBuffer::detect_binary(macho_header));
4560
4561 let pe_header: &[u8] = &[0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00];
4563 assert!(TextBuffer::detect_binary(pe_header));
4564 }
4565}
4566
4567#[derive(Debug, Clone)]
4569pub struct LineData {
4570 pub byte_offset: usize,
4572 pub content: String,
4574 pub has_newline: bool,
4576 pub line_number: Option<usize>,
4578}
4579
4580pub struct TextBufferLineIterator {
4583 lines: Vec<LineData>,
4585 current_index: usize,
4587 pub has_more: bool,
4589}
4590
4591impl TextBufferLineIterator {
4592 pub(crate) fn new(buffer: &mut TextBuffer, byte_pos: usize, max_lines: usize) -> Result<Self> {
4593 let buffer_len = buffer.len();
4594 if byte_pos >= buffer_len {
4595 return Ok(Self {
4596 lines: Vec::new(),
4597 current_index: 0,
4598 has_more: false,
4599 });
4600 }
4601
4602 let has_line_metadata = buffer.line_count().is_some();
4604
4605 let mut current_line = if has_line_metadata {
4608 buffer.offset_to_position(byte_pos).map(|pos| pos.line)
4609 } else {
4610 None
4611 };
4612
4613 let mut lines = Vec::with_capacity(max_lines);
4614 let mut current_offset = byte_pos;
4615 let estimated_line_length = 80; for _ in 0..max_lines {
4619 if current_offset >= buffer_len {
4620 break;
4621 }
4622
4623 let line_start = current_offset;
4624 let line_number = current_line;
4625
4626 let estimated_max_line_length = estimated_line_length * 3;
4628 let bytes_to_scan = estimated_max_line_length.min(buffer_len - current_offset);
4629
4630 let chunk = buffer.get_text_range_mut(current_offset, bytes_to_scan)?;
4632
4633 let mut line_len = 0;
4635 let mut found_newline = false;
4636 for &byte in chunk.iter() {
4637 line_len += 1;
4638 if byte == b'\n' {
4639 found_newline = true;
4640 break;
4641 }
4642 }
4643
4644 if !found_newline && current_offset + line_len < buffer_len {
4646 let remaining = buffer_len - current_offset - line_len;
4648 let additional_bytes = estimated_max_line_length.min(remaining);
4649 let more_chunk =
4650 buffer.get_text_range_mut(current_offset + line_len, additional_bytes)?;
4651
4652 let mut extended_chunk = chunk;
4653 extended_chunk.extend_from_slice(&more_chunk);
4654
4655 for &byte in more_chunk.iter() {
4656 line_len += 1;
4657 if byte == b'\n' {
4658 found_newline = true;
4659 break;
4660 }
4661 }
4662
4663 let line_string = String::from_utf8_lossy(&extended_chunk[..line_len]).into_owned();
4664 let has_newline = line_string.ends_with('\n');
4665 let content = if has_newline {
4666 line_string[..line_string.len() - 1].to_string()
4667 } else {
4668 line_string
4669 };
4670
4671 lines.push(LineData {
4672 byte_offset: line_start,
4673 content,
4674 has_newline,
4675 line_number,
4676 });
4677
4678 current_offset += line_len;
4679 if has_line_metadata && found_newline {
4680 current_line = current_line.map(|n| n + 1);
4681 }
4682 continue;
4683 }
4684
4685 let line_string = String::from_utf8_lossy(&chunk[..line_len]).into_owned();
4687 let has_newline = line_string.ends_with('\n');
4688 let content = if has_newline {
4689 line_string[..line_string.len() - 1].to_string()
4690 } else {
4691 line_string
4692 };
4693
4694 lines.push(LineData {
4695 byte_offset: line_start,
4696 content,
4697 has_newline,
4698 line_number,
4699 });
4700
4701 current_offset += line_len;
4702 if has_line_metadata && found_newline {
4704 current_line = current_line.map(|n| n + 1);
4705 }
4706 }
4707
4708 let has_more = current_offset < buffer_len;
4710
4711 Ok(Self {
4712 lines,
4713 current_index: 0,
4714 has_more,
4715 })
4716 }
4717}
4718
4719impl Iterator for TextBufferLineIterator {
4720 type Item = LineData;
4721
4722 fn next(&mut self) -> Option<Self::Item> {
4723 if self.current_index < self.lines.len() {
4724 let line = self.lines[self.current_index].clone();
4725 self.current_index += 1;
4726 Some(line)
4727 } else {
4728 None
4729 }
4730 }
4731}