1use std::{
2 iter::FusedIterator,
3 ops::{Range, RangeBounds},
4 str::Utf8Error,
5 sync::LazyLock,
6};
7
8use gapbuf::GapBuffer;
9use lender::{DoubleEndedLender, ExactSizeLender, Lender, Lending};
10
11use super::{Point, RegexPattern, TextRange, records::Records};
12use crate::{opts::PrintOpts, text::TextIndex};
13
14static EMPTY_BYTES: LazyLock<Bytes> = LazyLock::new(Bytes::default);
15
16#[derive(Default, Clone)]
20pub struct Bytes {
21 buf: GapBuffer<u8>,
22 records: Records,
23}
24
25impl Bytes {
26 #[doc(hidden)]
30 pub(crate) fn new(string: &str) -> Self {
31 let buf = GapBuffer::from_iter(string.bytes());
32
33 let len = buf.len();
34 let chars = string.chars().count();
35 let lines = buf.iter().filter(|b| **b == b'\n').count();
36 Self {
37 buf,
38 records: Records::new([len, chars, lines]),
39 }
40 }
41
42 pub fn len(&self) -> Point {
46 let [b, c, l] = self.records.max();
47 Point::from_raw(b, c, l)
48 }
49
50 pub fn is_empty(&self) -> bool {
65 let (s0, s1) = self.buf.as_slices();
66 (s0 == b"\n" && s1 == b"") || (s0 == b"" && s1 == b"\n")
67 }
68
69 pub fn char_at(&self, p: impl TextIndex) -> Option<char> {
71 if p.to_byte_index() >= self.len().byte() {
72 return None;
73 }
74
75 let [s0, s1] = self.strs_inner(..).unwrap();
76 Some(if p.to_byte_index() < s0.len() {
77 s0[p.to_byte_index()..].chars().next().unwrap()
78 } else {
79 s1[p.to_byte_index() - s0.len()..]
80 .chars()
81 .next()
82 .unwrap_or_else(|| panic!("{self:#?}"))
83 })
84 }
85
86 #[track_caller]
102 pub fn slices(&self, range: impl TextRange) -> Slices<'_> {
103 let (s0, s1) = self
104 .buf
105 .range(range.to_range(self.len().byte()))
106 .as_slices();
107 Slices([s0.iter(), s1.iter()])
108 }
109
110 #[track_caller]
153 pub fn strs(&self, range: impl TextRange) -> Option<Strs<'_>> {
154 let range = range.to_range(self.len().byte());
155 Some(Strs {
156 bytes: self,
157 range: range.clone(),
158 arr: self.strs_inner(range)?,
159 fwd: 0,
160 rev: 2,
161 })
162 }
163
164 #[track_caller]
171 pub fn lines(&self, range: impl TextRange) -> Lines<'_> {
172 let range = range.to_range(self.len().byte());
173 let start = self.point_at_line(self.point_at_byte(range.start).line());
174 let end = {
175 let end = self.point_at_byte(range.end);
176 let line_start = self.point_at_line(end.line());
177 match line_start == end {
178 true => end,
179 false => self.point_at_line((end.line() + 1).min(self.len().line())),
180 }
181 };
182
183 let (fwd_i, rev_i) = (start.line(), end.line());
186 if let Some(str) = self.get_contiguous(start..end) {
187 let lines = [str.lines(), "".lines()];
188 Lines::new(lines, None, fwd_i, rev_i)
189 } else if end.byte() > start.byte() && self.buf[self.buf.gap() - 1] == b'\n' {
192 let [s0, s1] = self.strs_inner(start.byte()..end.byte()).unwrap();
193 let lines = [s0.lines(), s1.lines()];
194 Lines::new(lines, None, fwd_i, rev_i)
195 } else {
198 let [s0, s1] = self.strs_inner(start.byte()..end.byte()).unwrap();
199
200 let (before, split0) = match s0.rsplit_once('\n') {
201 Some((before, split)) => (before, split),
202 None => ("", s0),
203 };
204 let (after, split1) = match s1.split_once('\n') {
205 Some((after, split)) => (after, split),
206 None => ("", s1),
207 };
208
209 let lines = [before.lines(), after.lines()];
210 let split_line = Some(split0.to_string() + split1);
211 Lines::new(lines, split_line, fwd_i, rev_i)
212 }
213 }
214
215 #[track_caller]
217 fn strs_inner(&self, range: impl RangeBounds<usize>) -> Option<[&str; 2]> {
218 let (start, end) = crate::utils::get_ends(range, self.len().byte());
219 use std::str::from_utf8_unchecked;
220
221 let (s0, s1) = self.buf.as_slices();
222
223 if s0.first().is_some_and(|b| utf8_char_width(*b) == 0)
225 || s1.first().is_some_and(|b| utf8_char_width(*b) == 0)
226 || self.buf.get(end).is_some_and(|b| utf8_char_width(*b) == 0)
227 {
228 return None;
229 }
230
231 Some(unsafe {
232 let r0 = start.min(s0.len())..end.min(s0.len());
233 let r1 = start.saturating_sub(s0.len()).min(s1.len())
234 ..end.saturating_sub(s0.len()).min(s1.len());
235
236 [from_utf8_unchecked(&s0[r0]), from_utf8_unchecked(&s1[r1])]
237 })
238 }
239
240 #[inline(always)]
251 #[track_caller]
252 pub fn point_at_byte(&self, b: usize) -> Point {
253 assert!(
254 b <= self.len().byte(),
255 "byte out of bounds: the len is {}, but the byte is {b}",
256 self.len().byte()
257 );
258
259 let [c_b, c_c, mut c_l] = self.records.closest_to_by_key(b, |[b, ..]| b);
260
261 let found = if b >= c_b {
262 let [s0, s1] = self.strs_inner(c_b..).unwrap();
263
264 s0.char_indices()
265 .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
266 .enumerate()
267 .map(|(i, (this_b, char))| {
268 c_l += (char == '\n') as usize;
269 (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
270 })
271 .take_while(|&(rhs, ..)| b >= rhs)
272 .last()
273 } else {
274 let mut c_len = 0;
275 self.strs_inner(..c_b)
276 .unwrap()
277 .into_iter()
278 .flat_map(str::chars)
279 .rev()
280 .enumerate()
281 .map(|(i, char)| {
282 c_l -= (char == '\n') as usize;
283 c_len += char.len_utf8();
284 (c_b - c_len, c_c - (i + 1), c_l)
285 })
286 .take_while(|&(rhs, ..)| b <= rhs)
287 .last()
288 };
289
290 found
291 .map(|(b, c, l)| Point::from_raw(b, c, l))
292 .unwrap_or(self.len())
293 }
294
295 #[inline(always)]
302 #[track_caller]
303 pub fn point_at_char(&self, c: usize) -> Point {
304 assert!(
305 c <= self.len().char(),
306 "char out of bounds: the len is {}, but the char is {c}",
307 self.len().char()
308 );
309
310 let [c_b, c_c, mut c_l] = self.records.closest_to_by_key(c, |[_, c, _]| c);
311
312 let found = if c >= c_c {
313 let [s0, s1] = self.strs_inner(c_b..).unwrap();
314
315 s0.char_indices()
316 .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
317 .enumerate()
318 .map(|(i, (this_b, char))| {
319 c_l += (char == '\n') as usize;
320 (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
321 })
322 .take_while(|&(_, rhs, _)| c >= rhs)
323 .last()
324 } else {
325 let mut c_len = 0;
326 self.strs_inner(..c_b)
327 .unwrap()
328 .into_iter()
329 .flat_map(str::chars)
330 .rev()
331 .enumerate()
332 .map(|(i, char)| {
333 c_l -= (char == '\n') as usize;
334 c_len += char.len_utf8();
335 (c_b - c_len, c_c - (i + 1), c_l)
336 })
337 .take_while(|&(_, rhs, _)| c <= rhs)
338 .last()
339 };
340
341 found
342 .map(|(b, c, l)| Point::from_raw(b, c, l))
343 .unwrap_or(self.len())
344 }
345
346 #[inline(always)]
356 #[track_caller]
357 pub fn point_at_line(&self, l: usize) -> Point {
358 assert!(
359 l <= self.len().line(),
360 "line out of bounds: the len is {}, but the line is {l}",
361 self.len().line()
362 );
363
364 let (c_b, c_c, mut c_l) = {
365 let [b, c, l] = self.records.closest_to_by_key(l, |[.., l]| l);
366 let (b, c) = self
367 .strs_inner(..b)
368 .unwrap()
369 .into_iter()
370 .flat_map(str::chars)
371 .rev()
372 .take_while(|c| *c != '\n')
373 .fold((b, c), |(b, c), char| (b - char.len_utf8(), c - 1));
374 (b, c, l)
375 };
376
377 let found = if l >= c_l {
378 let [s0, s1] = self.strs_inner(c_b..).unwrap();
379
380 s0.char_indices()
381 .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
382 .enumerate()
383 .map(|(i, (this_b, char))| {
384 c_l += (char == '\n') as usize;
385 (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
386 })
387 .find(|&(.., rhs)| l == rhs)
388 } else {
389 let mut c_len = 0;
390 self.strs_inner(..c_b)
391 .unwrap()
392 .into_iter()
393 .flat_map(str::chars)
394 .rev()
395 .enumerate()
396 .map(|(i, char)| {
397 c_l -= (char == '\n') as usize;
398 c_len += char.len_utf8();
399 (c_b - c_len, c_c - (i + 1), c_l)
400 })
401 .take_while(|&(.., rhs)| l <= rhs)
402 .last()
403 };
404
405 found
406 .map(|(b, c, l)| Point::from_raw(b, c, l))
407 .unwrap_or(self.len())
408 }
409
410 #[inline(always)]
421 #[track_caller]
422 pub fn line_range(&self, l: usize) -> Range<Point> {
423 assert!(
424 l <= self.len().line(),
425 "line out of bounds: the len is {}, but the line is {l}",
426 self.len().line()
427 );
428
429 let start = self.point_at_line(l);
430 let end = self
431 .chars_fwd(start..)
432 .unwrap()
433 .find_map(|(p, _)| (p.line() > start.line()).then_some(p))
434 .unwrap_or(self.len());
435 start..end
436 }
437
438 pub fn last_point(&self) -> Point {
447 self.len().rev('\n')
448 }
449
450 #[track_caller]
456 pub fn chars_fwd(
457 &self,
458 range: impl TextRange,
459 ) -> Option<impl Iterator<Item = (Point, char)> + '_> {
460 let range = range.to_range(self.len().byte());
461 let p = self.point_at_byte(range.start);
462 Some(self.strs(range)?.chars().scan(p, |p, char| {
463 let old_p = *p;
464 *p = p.fwd(char);
465 Some((old_p, char))
466 }))
467 }
468
469 #[track_caller]
475 pub fn chars_rev(
476 &self,
477 range: impl TextRange,
478 ) -> Option<impl Iterator<Item = (Point, char)> + '_> {
479 let range = range.to_range(self.len().byte());
480 let p = self.point_at_byte(range.end);
481 Some(self.strs(range)?.chars().rev().scan(p, |p, char| {
482 *p = p.rev(char);
483 Some((*p, char))
484 }))
485 }
486
487 pub fn indent(&self, p: Point, opts: PrintOpts) -> usize {
489 let range = self.line_range(p.line());
490 self.chars_fwd(range.start..)
491 .unwrap()
492 .map_while(|(_, c)| match c {
493 ' ' => Some(1),
494 '\t' => Some(opts.tabstop as usize),
495 _ => None,
496 })
497 .sum()
498 }
499
500 pub(crate) fn apply_change(&mut self, change: super::Change<&str>) {
506 let edit = change.added_str();
507 let start = change.start();
508
509 let range = start.byte()..change.taken_end().byte();
510 self.buf.splice(range, edit.bytes());
511
512 let start_rec = [start.byte(), start.char(), start.line()];
513 let old_len = [
514 change.taken_end().byte() - start.byte(),
515 change.taken_end().char() - start.char(),
516 change.taken_end().line() - start.line(),
517 ];
518 let new_len = [
519 change.added_end().byte() - start.byte(),
520 change.added_end().char() - start.char(),
521 change.added_end().line() - start.line(),
522 ];
523
524 self.records.transform(start_rec, old_len, new_len);
525 self.records.insert(start_rec);
526 }
527
528 #[track_caller]
530 pub(super) fn add_record(&mut self, [b, c, l]: [usize; 3]) {
531 self.records.insert([b, c, l]);
532 }
533
534 pub fn get_contiguous(&self, range: impl TextRange) -> Option<&str> {
543 let range = range.to_range(self.len().byte());
544 let [s0, s1] = self.strs_inner(..).unwrap();
545
546 if range.end <= self.buf.gap() {
547 s0.get(range)
548 } else {
549 let gap = self.buf.gap();
550 s1.get(range.start.checked_sub(gap)?..range.end.checked_sub(gap)?)
551 }
552 }
553}
554
555pub struct Lines<'a> {
563 lines: [std::str::Lines<'a>; 2],
564 split_line: Option<String>,
565 fwd_i: usize,
566 rev_i: usize,
567 split_line_used: bool,
568}
569
570impl<'a> Lines<'a> {
571 fn new(
572 lines: [std::str::Lines<'a>; 2],
573 split_line: Option<String>,
574 fwd_i: usize,
575 rev_i: usize,
576 ) -> Self {
577 Self {
578 lines,
579 split_line,
580 fwd_i,
581 rev_i,
582 split_line_used: false,
583 }
584 }
585}
586
587impl<'a, 'text> Lending<'a> for Lines<'text> {
588 type Lend = (usize, &'a str);
589}
590
591impl<'a> Lender for Lines<'a> {
592 fn next(&mut self) -> Option<lender::Lend<'_, Self>> {
593 self.lines[0]
594 .next()
595 .or_else(|| {
596 if self.split_line_used {
597 None
598 } else {
599 self.split_line_used = true;
600 self.split_line.as_deref()
601 }
602 })
603 .or_else(|| self.lines[1].next())
604 .map(|line| {
605 self.fwd_i += 1;
606 (self.fwd_i - 1, line)
607 })
608 }
609
610 fn size_hint(&self) -> (usize, Option<usize>) {
611 (self.rev_i - self.fwd_i, Some(self.rev_i - self.fwd_i))
612 }
613}
614
615impl<'a> DoubleEndedLender for Lines<'a> {
616 fn next_back(&mut self) -> Option<lender::Lend<'_, Self>> {
617 self.lines[1]
618 .next_back()
619 .or_else(|| {
620 if self.split_line_used {
621 None
622 } else {
623 self.split_line_used = true;
624 self.split_line.as_deref()
625 }
626 })
627 .or_else(|| self.lines[0].next_back())
628 .map(|line| {
629 self.rev_i -= 1;
630 (self.rev_i, line)
631 })
632 }
633}
634
635impl<'a> ExactSizeLender for Lines<'a> {}
636
637#[derive(Clone)]
641pub struct Slices<'a>([std::slice::Iter<'a, u8>; 2]);
642
643impl<'a> Slices<'a> {
644 pub fn to_array(&self) -> [&'a [u8]; 2] {
646 self.0.clone().map(|iter| iter.as_slice())
647 }
648
649 pub fn try_to_string(self) -> Result<String, Utf8Error> {
658 let [s0, s1] = self.0.map(|arr| arr.as_slice());
659 Ok([str::from_utf8(s0)?, str::from_utf8(s1)?].join(""))
660 }
661
662 pub unsafe fn chars_unchecked(self) -> impl Iterator<Item = char> {
677 self.0
678 .into_iter()
679 .flat_map(|iter| unsafe { str::from_utf8_unchecked(iter.as_slice()) }.chars())
680 }
681}
682
683impl<'a> Iterator for Slices<'a> {
684 type Item = u8;
685
686 fn next(&mut self) -> Option<Self::Item> {
687 self.0[0].next().or_else(|| self.0[1].next()).copied()
688 }
689
690 fn size_hint(&self) -> (usize, Option<usize>) {
691 let (l0, u0) = self.0[0].size_hint();
692 let (l1, u1) = self.0[1].size_hint();
693 (l0 + l1, Some(u0.unwrap() + u1.unwrap()))
694 }
695}
696
697impl<'a> ExactSizeIterator for Slices<'a> {}
698
699impl<'a> DoubleEndedIterator for Slices<'a> {
700 fn next_back(&mut self) -> Option<Self::Item> {
701 self.0[1]
702 .next_back()
703 .or_else(|| self.0[0].next_back())
704 .copied()
705 }
706}
707
708#[derive(Clone)]
713pub struct Strs<'a> {
714 bytes: &'a Bytes,
715 range: Range<usize>,
716 arr: [&'a str; 2],
717 fwd: usize,
718 rev: usize,
719}
720
721impl<'a> Strs<'a> {
722 pub fn to_array(&self) -> [&'a str; 2] {
724 self.arr
725 }
726
727 pub fn chars(self) -> impl DoubleEndedIterator<Item = char> + 'a {
731 let [s0, s1] = self.arr;
732 s0.chars().chain(s1.chars())
733 }
734
735 pub fn contains<P: RegexPattern>(&self, pat: P) -> Result<bool, Box<regex_syntax::Error>> {
738 self.bytes
739 .search_fwd(pat, self.range.clone())
740 .map(|mut iter| iter.next().is_some())
741 }
742}
743
744impl Strs<'static> {
745 pub fn empty() -> Self {
747 Self {
748 bytes: &*EMPTY_BYTES,
749 range: 0..0,
750 arr: [""; 2],
751 fwd: 0,
752 rev: 0,
753 }
754 }
755}
756
757impl<'a> Iterator for Strs<'a> {
758 type Item = &'a str;
759
760 fn next(&mut self) -> Option<Self::Item> {
761 match self.fwd {
762 0 | 1 if self.fwd != self.rev => {
763 self.fwd += 1;
764 Some(self.arr[self.fwd - 1])
765 }
766 _ => None,
767 }
768 }
769
770 fn size_hint(&self) -> (usize, Option<usize>) {
771 (self.rev - self.fwd, Some(self.rev - self.fwd))
772 }
773}
774
775impl ExactSizeIterator for Strs<'_> {}
776
777impl DoubleEndedIterator for Strs<'_> {
778 fn next_back(&mut self) -> Option<Self::Item> {
779 match self.rev {
780 1 | 2 if self.fwd != self.rev => {
781 self.rev -= 1;
782 Some(self.arr[self.rev])
783 }
784 _ => None,
785 }
786 }
787}
788
789impl FusedIterator for Strs<'_> {}
790
791impl AsRef<Bytes> for Bytes {
792 fn as_ref(&self) -> &Bytes {
793 self
794 }
795}
796
797impl std::fmt::Display for Strs<'_> {
798 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
799 let [s0, s1] = self.to_array();
800 write!(f, "{s0}{s1}")
801 }
802}
803
804impl std::fmt::Debug for Bytes {
805 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
806 f.debug_struct("Bytes")
807 .field("buf", &self.strs_inner(..))
808 .field("records", &self.records)
809 .finish()
810 }
811}
812
813impl PartialEq for Bytes {
814 fn eq(&self, other: &Self) -> bool {
815 self.buf.as_slices() == other.buf.as_slices()
816 }
817}
818
819impl PartialEq<&str> for Bytes {
820 fn eq(&self, other: &&str) -> bool {
821 let [s0, s1] = self.strs_inner(..).unwrap();
822 other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
823 }
824}
825
826impl PartialEq<String> for Bytes {
827 fn eq(&self, other: &String) -> bool {
828 let [s0, s1] = self.strs_inner(..).unwrap();
829 other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
830 }
831}
832
833impl PartialEq for Strs<'_> {
834 fn eq(&self, other: &Self) -> bool {
835 self.to_array() == other.to_array()
836 }
837}
838
839impl PartialEq<&str> for Strs<'_> {
840 fn eq(&self, other: &&str) -> bool {
841 let [s0, s1] = self.to_array();
842 other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
843 }
844}
845
846impl PartialEq<String> for Strs<'_> {
847 fn eq(&self, other: &String) -> bool {
848 let [s0, s1] = self.to_array();
849 other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
850 }
851}
852
853impl PartialEq<Bytes> for &str {
854 fn eq(&self, other: &Bytes) -> bool {
855 other == self
856 }
857}
858
859impl PartialEq<Bytes> for String {
860 fn eq(&self, other: &Bytes) -> bool {
861 other == self
862 }
863}
864
865impl PartialEq<Strs<'_>> for &str {
866 fn eq(&self, other: &Strs) -> bool {
867 other == self
868 }
869}
870
871impl PartialEq<Strs<'_>> for String {
872 fn eq(&self, other: &Strs) -> bool {
873 other == self
874 }
875}
876#[must_use]
879#[inline]
880pub const fn utf8_char_width(b: u8) -> usize {
881 const UTF8_CHAR_WIDTH: &[u8; 256] = &[
883 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ];
901 UTF8_CHAR_WIDTH[b as usize] as usize
902}