1use std::{iter::FusedIterator, ops::RangeBounds, str::Utf8Error};
2
3use gapbuf::GapBuffer;
4use lender::{DoubleEndedLender, ExactSizeLender, Lender, Lending};
5
6use super::{Point, TextRange, records::Records};
7use crate::cfg::PrintCfg;
8
9#[derive(Default, Clone)]
13pub struct Bytes {
14 buf: GapBuffer<u8>,
15 records: Records,
16}
17
18impl Bytes {
19 pub(crate) fn new(string: &str) -> Self {
21 let buf = GapBuffer::from_iter(string.bytes());
22
23 let len = buf.len();
24 let chars = string.chars().count();
25 let lines = buf.iter().filter(|b| **b == b'\n').count();
26 Self {
27 buf,
28 records: Records::new([len, chars, lines]),
29 }
30 }
31
32 pub fn len(&self) -> Point {
36 let [b, c, l] = self.records.max();
37 Point::from_raw(b, c, l)
38 }
39
40 pub fn is_empty(&self) -> bool {
55 let (s0, s1) = self.buf.as_slices();
56 (s0 == b"\n" && s1 == b"") || (s0 == b"" && s1 == b"\n")
57 }
58
59 pub fn char_at(&self, p: Point) -> Option<char> {
61 if p.byte() >= self.len().byte() {
62 return None;
63 }
64
65 let [s0, s1] = self.strs_inner(..).unwrap();
66 Some(if p.byte() < s0.len() {
67 s0[p.byte()..].chars().next().unwrap()
68 } else {
69 s1[p.byte() - s0.len()..]
70 .chars()
71 .next()
72 .unwrap_or_else(|| panic!("{self:#?}"))
73 })
74 }
75
76 pub fn buffers(&self, range: impl RangeBounds<usize>) -> Buffers<'_> {
92 let (s0, s1) = self.buf.range(range).as_slices();
93 Buffers([s0.iter(), s1.iter()])
94 }
95
96 pub fn strs(&self, range: impl TextRange) -> Option<Strs<'_>> {
139 let range = range.to_range(self.len().byte());
140 Some(Strs {
141 arr: self.strs_inner(range)?,
142 fwd: 0,
143 rev: 2,
144 })
145 }
146
147 pub fn lines(&self, range: impl TextRange) -> Lines<'_> {
154 let range = range.to_range(self.len().byte());
155 let start = self.point_at_line(self.point_at_byte(range.start).line());
156 let end = {
157 let end = self.point_at_byte(range.end);
158 let line_start = self.point_at_line(end.line());
159 match line_start == end {
160 true => end,
161 false => self.point_at_line((end.line() + 1).min(self.len().line())),
162 }
163 };
164
165 let (fwd_i, rev_i) = (start.line(), end.line());
168 if let Some(str) = self.get_contiguous(start..end) {
169 let lines = [str.lines(), "".lines()];
170 Lines::new(lines, None, fwd_i, rev_i)
171 } else if end.byte() > start.byte()
174 && self.buf[self.buf.gap() - 1] != b'\n'
175 && self.buf[self.buf.gap()] != b'\n'
176 {
177 let [s0, s1] = self.strs_inner(start.byte()..end.byte()).unwrap();
178 let lines = [s0.lines(), s1.lines()];
179 Lines::new(lines, None, fwd_i, rev_i)
180 } else {
183 let [s0, s1] = self.strs_inner(start.byte()..end.byte()).unwrap();
184
185 let (before, split0) = match s0.rsplit_once('\n') {
186 Some((before, split)) => (before, split),
187 None => ("", s0),
188 };
189 let (after, split1) = match s1.split_once('\n') {
190 Some((after, split)) => (after, split),
191 None => ("", s1),
192 };
193
194 let lines = [before.lines(), after.lines()];
195 let split_line = Some(split0.to_string() + split1);
196 Lines::new(lines, split_line, fwd_i, rev_i)
197 }
198 }
199
200 fn strs_inner(&self, range: impl RangeBounds<usize>) -> Option<[&str; 2]> {
202 let (start, end) = crate::get_ends(range, self.len().byte());
203 use std::str::from_utf8_unchecked;
204
205 let (s0, s1) = self.buf.as_slices();
206
207 if s0.first().is_some_and(|b| utf8_char_width(*b) == 0)
209 || s1.first().is_some_and(|b| utf8_char_width(*b) == 0)
210 || self.buf.get(end).is_some_and(|b| utf8_char_width(*b) == 0)
211 {
212 return None;
213 }
214
215 Some(unsafe {
216 let r0 = start.min(s0.len())..end.min(s0.len());
217 let r1 = start.saturating_sub(s0.len()).min(s1.len())
218 ..end.saturating_sub(s0.len()).min(s1.len());
219
220 [from_utf8_unchecked(&s0[r0]), from_utf8_unchecked(&s1[r1])]
221 })
222 }
223
224 #[inline(always)]
235 pub fn point_at_byte(&self, b: usize) -> Point {
236 assert!(
237 b <= self.len().byte(),
238 "byte out of bounds: the len is {}, but the byte is {b}",
239 self.len().byte()
240 );
241
242 let [c_b, c_c, mut c_l] = self.records.closest_to_by_key(b, |[b, ..]| b);
243
244 let found = if b >= c_b {
245 let [s0, s1] = self.strs_inner(c_b..).unwrap();
246
247 s0.char_indices()
248 .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
249 .enumerate()
250 .map(|(i, (this_b, char))| {
251 c_l += (char == '\n') as usize;
252 (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
253 })
254 .take_while(|&(rhs, ..)| b >= rhs)
255 .last()
256 } else {
257 let mut c_len = 0;
258 self.strs_inner(..c_b)
259 .unwrap()
260 .into_iter()
261 .flat_map(str::chars)
262 .rev()
263 .enumerate()
264 .map(|(i, char)| {
265 c_l -= (char == '\n') as usize;
266 c_len += char.len_utf8();
267 (c_b - c_len, c_c - (i + 1), c_l)
268 })
269 .take_while(|&(rhs, ..)| b <= rhs)
270 .last()
271 };
272
273 found
274 .map(|(b, c, l)| Point::from_raw(b, c, l))
275 .unwrap_or(self.len())
276 }
277
278 #[inline(always)]
285 pub fn point_at_char(&self, c: usize) -> Point {
286 assert!(
287 c <= self.len().char(),
288 "char out of bounds: the len is {}, but the char is {c}",
289 self.len().char()
290 );
291
292 let [c_b, c_c, mut c_l] = self.records.closest_to_by_key(c, |[_, c, _]| c);
293
294 let found = if c >= c_c {
295 let [s0, s1] = self.strs_inner(c_b..).unwrap();
296
297 s0.char_indices()
298 .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
299 .enumerate()
300 .map(|(i, (this_b, char))| {
301 c_l += (char == '\n') as usize;
302 (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
303 })
304 .take_while(|&(_, rhs, _)| c >= rhs)
305 .last()
306 } else {
307 let mut c_len = 0;
308 self.strs_inner(..c_b)
309 .unwrap()
310 .into_iter()
311 .flat_map(str::chars)
312 .rev()
313 .enumerate()
314 .map(|(i, char)| {
315 c_l -= (char == '\n') as usize;
316 c_len += char.len_utf8();
317 (c_b - c_len, c_c - (i + 1), c_l)
318 })
319 .take_while(|&(_, rhs, _)| c <= rhs)
320 .last()
321 };
322
323 found
324 .map(|(b, c, l)| Point::from_raw(b, c, l))
325 .unwrap_or(self.len())
326 }
327
328 #[inline(always)]
338 pub fn point_at_line(&self, l: usize) -> Point {
339 assert!(
340 l <= self.len().line(),
341 "line out of bounds: the len is {}, but the line is {l}",
342 self.len().line()
343 );
344
345 let (c_b, c_c, mut c_l) = {
346 let [mut b, mut c, l] = self.records.closest_to_by_key(l, |[.., l]| l);
347 self.strs_inner(..b)
348 .unwrap()
349 .into_iter()
350 .flat_map(str::chars)
351 .rev()
352 .take_while(|c| *c != '\n')
353 .for_each(|char| {
354 b -= char.len_utf8();
355 c -= 1;
356 });
357 (b, c, l)
358 };
359
360 let found = if l >= c_l {
361 let [s0, s1] = self.strs_inner(c_b..).unwrap();
362
363 s0.char_indices()
364 .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
365 .enumerate()
366 .map(|(i, (this_b, char))| {
367 c_l += (char == '\n') as usize;
368 (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
369 })
370 .find(|&(.., rhs)| l == rhs)
371 } else {
372 let mut c_len = 0;
373 self.strs_inner(..c_b)
374 .unwrap()
375 .into_iter()
376 .flat_map(str::chars)
377 .rev()
378 .enumerate()
379 .map(|(i, char)| {
380 c_l -= (char == '\n') as usize;
381 c_len += char.len_utf8();
382 (c_b - c_len, c_c - (i + 1), c_l)
383 })
384 .take_while(|&(.., rhs)| l <= rhs)
385 .last()
386 };
387
388 found
389 .map(|(b, c, l)| Point::from_raw(b, c, l))
390 .unwrap_or(self.len())
391 }
392
393 #[inline(always)]
404 pub fn points_of_line(&self, l: usize) -> [Point; 2] {
405 assert!(
406 l <= self.len().line(),
407 "byte out of bounds: the len is {}, but the line is {l}",
408 self.len().line()
409 );
410
411 let start = self.point_at_line(l);
412 let end = self
413 .chars_fwd(start..)
414 .unwrap()
415 .find_map(|(p, _)| (p.line() > start.line()).then_some(p))
416 .unwrap_or(start);
417 [start, end]
418 }
419
420 pub fn last_point(&self) -> Point {
429 let strs = self.strs_inner(..).unwrap();
430 let char = strs.into_iter().flat_map(str::chars).next_back().unwrap();
431 self.len().rev(char)
432 }
433
434 pub fn chars_fwd(
440 &self,
441 range: impl TextRange,
442 ) -> Option<impl Iterator<Item = (Point, char)> + '_> {
443 let range = range.to_range(self.len().byte());
444 let p = self.point_at_byte(range.start);
445 Some(self.strs(range)?.chars().scan(p, |p, char| {
446 let old_p = *p;
447 *p = p.fwd(char);
448 Some((old_p, char))
449 }))
450 }
451
452 pub fn chars_rev(
458 &self,
459 range: impl TextRange,
460 ) -> Option<impl Iterator<Item = (Point, char)> + '_> {
461 let range = range.to_range(self.len().byte());
462 let p = self.point_at_byte(range.end);
463 Some(self.strs(range)?.chars().rev().scan(p, |p, char| {
464 *p = p.rev(char);
465 Some((*p, char))
466 }))
467 }
468
469 pub fn indent(&self, p: Point, cfg: PrintCfg) -> usize {
471 let [start, _] = self.points_of_line(p.line());
472 self.chars_fwd(start..)
473 .unwrap()
474 .map_while(|(_, c)| match c {
475 ' ' => Some(1),
476 '\t' => Some(cfg.tab_stops.size() as usize),
477 _ => None,
478 })
479 .sum()
480 }
481
482 pub(crate) fn apply_change(&mut self, change: super::Change<&str>) {
488 let edit = change.added_str();
489 let start = change.start();
490
491 let range = start.byte()..change.taken_end().byte();
492 self.buf.splice(range, edit.bytes());
493
494 let start_rec = [start.byte(), start.char(), start.line()];
495 let old_len = [
496 change.taken_end().byte() - start.byte(),
497 change.taken_end().char() - start.char(),
498 change.taken_end().line() - start.line(),
499 ];
500 let new_len = [
501 change.added_end().byte() - start.byte(),
502 change.added_end().char() - start.char(),
503 change.added_end().line() - start.line(),
504 ];
505
506 self.records.transform(start_rec, old_len, new_len);
507 self.records.insert(start_rec);
508 }
509
510 pub(super) fn extend(&mut self, other: Self) {
512 self.buf.extend(other.buf);
513 self.records
514 .transform(self.records.max(), [0, 0, 0], other.records.max())
515 }
516
517 pub(super) fn add_record(&mut self, [b, c, l]: [usize; 3]) {
519 self.records.insert([b, c, l]);
520 }
521
522 pub fn get_contiguous(&self, range: impl TextRange) -> Option<&str> {
531 let range = range.to_range(self.len().byte());
532 let [s0, s1] = self.strs_inner(..).unwrap();
533
534 if range.end <= self.buf.gap() {
535 s0.get(range)
536 } else {
537 let gap = self.buf.gap();
538 s1.get(range.start.checked_sub(gap)?..range.end.checked_sub(gap)?)
539 }
540 }
541}
542
543pub struct Lines<'a> {
551 lines: [std::str::Lines<'a>; 2],
552 split_line: Option<String>,
553 fwd_i: usize,
554 rev_i: usize,
555 split_line_used: bool,
556}
557
558impl<'a> Lines<'a> {
559 fn new(
560 lines: [std::str::Lines<'a>; 2],
561 split_line: Option<String>,
562 fwd_i: usize,
563 rev_i: usize,
564 ) -> Self {
565 Self {
566 lines,
567 split_line,
568 fwd_i,
569 rev_i,
570 split_line_used: false,
571 }
572 }
573}
574
575impl<'a, 'text> Lending<'a> for Lines<'text> {
576 type Lend = (usize, &'a str);
577}
578
579impl<'a> Lender for Lines<'a> {
580 fn next(&mut self) -> Option<lender::Lend<'_, Self>> {
581 self.lines[0]
582 .next()
583 .or_else(|| {
584 if self.split_line_used {
585 None
586 } else {
587 self.split_line_used = true;
588 self.split_line.as_deref()
589 }
590 })
591 .or_else(|| self.lines[1].next())
592 .map(|line| {
593 self.fwd_i += 1;
594 (self.fwd_i - 1, line)
595 })
596 }
597
598 fn size_hint(&self) -> (usize, Option<usize>) {
599 (self.rev_i - self.fwd_i, Some(self.rev_i - self.fwd_i))
600 }
601}
602
603impl<'a> DoubleEndedLender for Lines<'a> {
604 fn next_back(&mut self) -> Option<lender::Lend<'_, Self>> {
605 self.lines[1]
606 .next_back()
607 .or_else(|| {
608 if self.split_line_used {
609 None
610 } else {
611 self.split_line_used = true;
612 self.split_line.as_deref()
613 }
614 })
615 .or_else(|| self.lines[0].next_back())
616 .map(|line| {
617 self.rev_i -= 1;
618 (self.rev_i, line)
619 })
620 }
621}
622
623impl<'a> ExactSizeLender for Lines<'a> {}
624
625#[derive(Clone)]
629pub struct Buffers<'a>([std::slice::Iter<'a, u8>; 2]);
630
631impl<'a> Buffers<'a> {
632 pub fn to_array(&self) -> [&'a [u8]; 2] {
634 self.0.clone().map(|iter| iter.as_slice())
635 }
636
637 pub fn try_to_string(self) -> Result<String, Utf8Error> {
646 let [s0, s1] = self.0.map(|arr| arr.as_slice());
647 Ok([str::from_utf8(s0)?, str::from_utf8(s1)?].join(""))
648 }
649
650 pub unsafe fn chars_unchecked(self) -> impl Iterator<Item = char> {
665 self.0
666 .into_iter()
667 .flat_map(|iter| unsafe { str::from_utf8_unchecked(iter.as_slice()) }.chars())
668 }
669}
670
671impl<'a> Iterator for Buffers<'a> {
672 type Item = u8;
673
674 fn next(&mut self) -> Option<Self::Item> {
675 self.0[0].next().or_else(|| self.0[1].next()).copied()
676 }
677
678 fn size_hint(&self) -> (usize, Option<usize>) {
679 let (l0, u0) = self.0[0].size_hint();
680 let (l1, u1) = self.0[1].size_hint();
681 (l0 + l1, Some(u0.unwrap() + u1.unwrap()))
682 }
683}
684
685impl<'a> ExactSizeIterator for Buffers<'a> {}
686
687impl<'a> DoubleEndedIterator for Buffers<'a> {
688 fn next_back(&mut self) -> Option<Self::Item> {
689 self.0[1]
690 .next_back()
691 .or_else(|| self.0[0].next_back())
692 .copied()
693 }
694}
695
696#[derive(Clone)]
701pub struct Strs<'a> {
702 arr: [&'a str; 2],
703 fwd: usize,
704 rev: usize,
705}
706
707impl<'a> Strs<'a> {
708 pub fn to_array(&self) -> [&'a str; 2] {
710 self.arr
711 }
712
713 pub fn chars(self) -> impl DoubleEndedIterator<Item = char> + 'a {
717 let [s0, s1] = self.arr;
718 s0.chars().chain(s1.chars())
719 }
720}
721
722impl<'a> Iterator for Strs<'a> {
723 type Item = &'a str;
724
725 fn next(&mut self) -> Option<Self::Item> {
726 match self.fwd {
727 0 | 1 if self.fwd != self.rev => {
728 self.fwd += 1;
729 Some(self.arr[self.fwd - 1])
730 }
731 _ => None,
732 }
733 }
734
735 fn size_hint(&self) -> (usize, Option<usize>) {
736 (self.rev - self.fwd, Some(self.rev - self.fwd))
737 }
738}
739
740impl ExactSizeIterator for Strs<'_> {}
741
742impl DoubleEndedIterator for Strs<'_> {
743 fn next_back(&mut self) -> Option<Self::Item> {
744 match self.rev {
745 1 | 2 if self.fwd != self.rev => {
746 self.rev -= 1;
747 Some(self.arr[self.rev])
748 }
749 _ => None,
750 }
751 }
752}
753
754impl FusedIterator for Strs<'_> {}
755
756impl AsRef<Bytes> for Bytes {
757 fn as_ref(&self) -> &Bytes {
758 self
759 }
760}
761
762impl std::fmt::Display for Strs<'_> {
763 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
764 let [s0, s1] = self.to_array();
765 write!(f, "{s0}{s1}")
766 }
767}
768
769impl std::fmt::Debug for Bytes {
770 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
771 f.debug_struct("Bytes")
772 .field("buf", &self.strs_inner(..))
773 .field("records", &self.records)
774 .finish()
775 }
776}
777
778impl PartialEq for Bytes {
779 fn eq(&self, other: &Self) -> bool {
780 self.buf.as_slices() == other.buf.as_slices()
781 }
782}
783
784impl PartialEq<&str> for Bytes {
785 fn eq(&self, other: &&str) -> bool {
786 let [s0, s1] = self.strs_inner(..).unwrap();
787 other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
788 }
789}
790
791impl PartialEq<String> for Bytes {
792 fn eq(&self, other: &String) -> bool {
793 let [s0, s1] = self.strs_inner(..).unwrap();
794 other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
795 }
796}
797
798impl PartialEq for Strs<'_> {
799 fn eq(&self, other: &Self) -> bool {
800 self.to_array() == other.to_array()
801 }
802}
803
804impl PartialEq<&str> for Strs<'_> {
805 fn eq(&self, other: &&str) -> bool {
806 let [s0, s1] = self.to_array();
807 other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
808 }
809}
810
811impl PartialEq<String> for Strs<'_> {
812 fn eq(&self, other: &String) -> bool {
813 let [s0, s1] = self.to_array();
814 other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
815 }
816}
817
818#[must_use]
821#[inline]
822pub const fn utf8_char_width(b: u8) -> usize {
823 const UTF8_CHAR_WIDTH: &[u8; 256] = &[
825 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ];
843 UTF8_CHAR_WIDTH[b as usize] as usize
844}