1use std::{
2 ops::{ControlFlow, Range, RangeBounds},
3 str::Utf8Error,
4};
5
6use gapbuf::GapBuffer;
7use lender::{DoubleEndedLender, ExactSizeLender, Lender, Lending};
8
9pub use crate::text::bytes::strs::Strs;
10use crate::{
11 buffer::Change,
12 opts::PrintOpts,
13 text::{Point, TextIndex, TextRange, records::Records, utils::implPartialEq},
14};
15
16mod strs;
17
18#[derive(Default, Clone)]
22pub struct Bytes {
23 buf: GapBuffer<u8>,
24 records: Records,
25 pub(super) bytes_state: u64,
26}
27
28impl Bytes {
29 #[doc(hidden)]
33 pub(crate) fn new(string: &str) -> Self {
34 let buf = GapBuffer::from_iter(string.bytes());
35
36 let len = buf.len();
37 let chars = string.chars().count();
38 let lines = buf.iter().filter(|b| **b == b'\n').count();
39 Self {
40 buf,
41 records: Records::new([len, chars, lines]),
42 bytes_state: 0,
43 }
44 }
45
46 pub fn len(&self) -> Point {
50 let [b, c, l] = self.records.max();
51 Point::from_raw(b, c, l)
52 }
53
54 pub fn is_empty(&self) -> bool {
69 let (s0, s1) = self.buf.as_slices();
70 (s0 == b"\n" && s1 == b"") || (s0 == b"" && s1 == b"\n")
71 }
72
73 pub fn char_at(&self, p: impl TextIndex) -> Option<char> {
75 if p.to_byte_index() >= self.len().byte() {
76 return None;
77 }
78
79 let [s0, s1] = self.strs_inner(..).unwrap();
80 Some(if p.to_byte_index() < s0.len() {
81 s0[p.to_byte_index()..].chars().next().unwrap()
82 } else {
83 s1[p.to_byte_index() - s0.len()..]
84 .chars()
85 .next()
86 .unwrap_or_else(|| panic!("{self:#?}"))
87 })
88 }
89
90 pub fn strs(&self, range: impl TextRange) -> Option<Strs<'_>> {
140 let range = range.to_range(self.len().byte());
141
142 Some(Strs::new(
143 self,
144 (range.start, range.end),
145 self.strs_inner(range)?,
146 ))
147 }
148
149 #[track_caller]
165 pub fn slices(&self, range: impl TextRange) -> Slices<'_> {
166 let (s0, s1) = self
167 .buf
168 .range(range.to_range(self.len().byte()))
169 .as_slices();
170 Slices([s0.iter(), s1.iter()])
171 }
172
173 #[track_caller]
180 pub fn lines(&self, range: impl TextRange) -> Lines<'_> {
181 let range = range.to_range(self.len().byte());
182 let start = self.point_at_line(self.point_at_byte(range.start).line());
183 let end = {
184 let end = self.point_at_byte(range.end);
185 let line_start = self.point_at_line(end.line());
186 if line_start == end {
187 end
188 } else {
189 self.point_at_line((end.line() + 1).min(self.len().line()))
190 }
191 };
192
193 let (fwd_i, rev_i) = (start.line(), end.line());
196 if let Some(str) = self.get_contiguous(start..end) {
197 let lines = [str.lines(), "".lines()];
198 Lines::new(lines, None, fwd_i, rev_i)
199 } else if end.byte() > start.byte() && self.buf[self.buf.gap() - 1] == b'\n' {
202 let [s0, s1] = self.strs_inner(start.byte()..end.byte()).unwrap();
203 let lines = [s0.lines(), s1.lines()];
204 Lines::new(lines, None, fwd_i, rev_i)
205 } else {
208 let [s0, s1] = self.strs_inner(start.byte()..end.byte()).unwrap();
209
210 let (before, split0) = match s0.rsplit_once('\n') {
211 Some((before, split)) => (before, split),
212 None => ("", s0),
213 };
214 let (after, split1) = match s1.split_once('\n') {
215 Some((split, after)) => (after, split),
216 None => ("", s1),
217 };
218
219 let lines = [before.lines(), after.lines()];
220 let split_line = Some(split0.to_string() + split1);
221 Lines::new(lines, split_line, fwd_i, rev_i)
222 }
223 }
224
225 #[track_caller]
227 fn strs_inner(&self, range: impl RangeBounds<usize>) -> Option<[&str; 2]> {
228 let range = crate::utils::get_range(range, self.len().byte());
229 use std::str::from_utf8_unchecked;
230
231 let (s0, s1) = self.buf.as_slices();
232
233 if s0.first().is_some_and(|b| utf8_char_width(*b) == 0)
235 || s1.first().is_some_and(|b| utf8_char_width(*b) == 0)
236 || self
237 .buf
238 .get(range.end)
239 .is_some_and(|b| utf8_char_width(*b) == 0)
240 {
241 return None;
242 }
243
244 Some(unsafe {
245 let r0 = range.start.min(s0.len())..range.end.min(s0.len());
246 let r1 = range.start.saturating_sub(s0.len()).min(s1.len())
247 ..range.end.saturating_sub(s0.len()).min(s1.len());
248
249 [from_utf8_unchecked(&s0[r0]), from_utf8_unchecked(&s1[r1])]
250 })
251 }
252
253 #[inline(always)]
264 #[track_caller]
265 pub fn point_at_byte(&self, b: usize) -> Point {
266 assert!(
267 b <= self.len().byte(),
268 "byte out of bounds: the len is {}, but the byte is {b}",
269 self.len().byte()
270 );
271
272 let [c_b, c_c, mut c_l] = self.records.closest_to_by_key(b, |[b, ..]| b);
273
274 let found = if b >= c_b {
275 let [s0, s1] = self.strs_inner(c_b..).unwrap();
276
277 s0.char_indices()
278 .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
279 .enumerate()
280 .map(|(i, (this_b, char))| {
281 c_l += (char == '\n') as usize;
282 (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
283 })
284 .take_while(|&(rhs, ..)| b >= rhs)
285 .last()
286 } else {
287 let mut c_len = 0;
288 self.strs_inner(..c_b)
289 .unwrap()
290 .into_iter()
291 .flat_map(str::chars)
292 .rev()
293 .enumerate()
294 .map(|(i, char)| {
295 c_l -= (char == '\n') as usize;
296 c_len += char.len_utf8();
297 (c_b - c_len, c_c - (i + 1), c_l)
298 })
299 .take_while(|&(rhs, ..)| b <= rhs)
300 .last()
301 };
302
303 found
304 .map(|(b, c, l)| Point::from_raw(b, c, l))
305 .unwrap_or(self.len())
306 }
307
308 #[inline(always)]
315 #[track_caller]
316 pub fn point_at_char(&self, c: usize) -> Point {
317 assert!(
318 c <= self.len().char(),
319 "char out of bounds: the len is {}, but the char is {c}",
320 self.len().char()
321 );
322
323 let [c_b, c_c, mut c_l] = self.records.closest_to_by_key(c, |[_, c, _]| c);
324
325 let found = if c >= c_c {
326 let [s0, s1] = self.strs_inner(c_b..).unwrap();
327
328 s0.char_indices()
329 .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
330 .enumerate()
331 .map(|(i, (this_b, char))| {
332 c_l += (char == '\n') as usize;
333 (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
334 })
335 .take_while(|&(_, rhs, _)| c >= rhs)
336 .last()
337 } else {
338 let mut c_len = 0;
339 self.strs_inner(..c_b)
340 .unwrap()
341 .into_iter()
342 .flat_map(str::chars)
343 .rev()
344 .enumerate()
345 .map(|(i, char)| {
346 c_l -= (char == '\n') as usize;
347 c_len += char.len_utf8();
348 (c_b - c_len, c_c - (i + 1), c_l)
349 })
350 .take_while(|&(_, rhs, _)| c <= rhs)
351 .last()
352 };
353
354 found
355 .map(|(b, c, l)| Point::from_raw(b, c, l))
356 .unwrap_or(self.len())
357 }
358
359 #[inline(always)]
369 #[track_caller]
370 pub fn point_at_line(&self, l: usize) -> Point {
371 assert!(
372 l <= self.len().line(),
373 "line out of bounds: the len is {}, but the line is {l}",
374 self.len().line()
375 );
376
377 let (c_b, c_c, mut c_l) = {
378 let [b, c, l] = self.records.closest_to_by_key(l, |[.., l]| l);
379 let (b, c) = self
380 .strs_inner(..b)
381 .unwrap()
382 .into_iter()
383 .flat_map(str::chars)
384 .rev()
385 .take_while(|c| *c != '\n')
386 .fold((b, c), |(b, c), char| (b - char.len_utf8(), c - 1));
387 (b, c, l)
388 };
389
390 let found = if l >= c_l {
391 let [s0, s1] = self.strs_inner(c_b..).unwrap();
392
393 s0.char_indices()
394 .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
395 .enumerate()
396 .map(|(i, (this_b, char))| {
397 c_l += (char == '\n') as usize;
398 (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
399 })
400 .find(|&(.., rhs)| l == rhs)
401 } else {
402 let mut c_len = 0;
403 self.strs_inner(..c_b)
404 .unwrap()
405 .into_iter()
406 .flat_map(str::chars)
407 .rev()
408 .enumerate()
409 .map(|(i, char)| {
410 c_l -= (char == '\n') as usize;
411 c_len += char.len_utf8();
412 (c_b - c_len, c_c - (i + 1), c_l)
413 })
414 .take_while(|&(.., rhs)| l <= rhs)
415 .last()
416 };
417
418 found
419 .map(|(b, c, l)| Point::from_raw(b, c, l))
420 .unwrap_or(self.len())
421 }
422
423 #[inline(always)]
434 #[track_caller]
435 pub fn line_range(&self, l: usize) -> Range<Point> {
436 assert!(
437 l <= self.len().line(),
438 "line out of bounds: the len is {}, but the line is {l}",
439 self.len().line()
440 );
441
442 let start = self.point_at_line(l);
443 let (ControlFlow::Continue(end) | ControlFlow::Break(end)) = self
444 .chars_fwd(start..)
445 .unwrap()
446 .try_fold(start, |end, (_, char)| match end.line() == start.line() {
447 true => ControlFlow::Continue(end.fwd(char)),
448 false => ControlFlow::Break(end),
449 });
450
451 start..end
452 }
453
454 pub fn last_point(&self) -> Point {
463 self.len().rev('\n')
464 }
465
466 #[track_caller]
472 pub fn chars_fwd(
473 &self,
474 range: impl TextRange,
475 ) -> Option<impl Iterator<Item = (usize, char)> + '_> {
476 let mut range = range.to_range(self.len().byte());
477 Some(self.strs(range.clone())?.chars().map(move |char| {
478 let byte = range.start;
479 range.start += char.len_utf8();
480 (byte, char)
481 }))
482 }
483
484 #[track_caller]
490 pub fn chars_rev(
491 &self,
492 range: impl TextRange,
493 ) -> Option<impl Iterator<Item = (usize, char)> + '_> {
494 let mut range = range.to_range(self.len().byte());
495 Some(self.strs(range.clone())?.chars().rev().map(move |char| {
496 range.end -= char.len_utf8();
497 (range.end, char)
498 }))
499 }
500
501 pub fn indent(&self, p: Point, opts: PrintOpts) -> usize {
503 let range = self.line_range(p.line());
504 self.chars_fwd(range.start..)
505 .unwrap()
506 .map_while(|(_, c)| match c {
507 ' ' => Some(1),
508 '\t' => Some(opts.tabstop as usize),
509 _ => None,
510 })
511 .sum()
512 }
513
514 pub fn replace_range(&mut self, range: impl TextRange, new: impl AsRef<str>) {
522 let edit = new.as_ref();
523 let range = range.to_range(self.len().byte());
524
525 let start = self.point_at_byte(range.start);
526 let taken_len = self.point_at_byte(range.end) - start;
527 let added_len = Point::len_of(edit);
528
529 self.buf.splice(range, edit.bytes());
530
531 let start_rec = [start.byte(), start.char(), start.line()];
532 let old_len = [taken_len.byte(), taken_len.char(), taken_len.line()];
533 let new_len = [added_len.byte(), added_len.char(), added_len.line()];
534
535 self.records.transform(start_rec, old_len, new_len);
536 self.records.insert(start_rec);
537 }
538
539 pub(crate) fn apply_change(&mut self, change: Change<&str>) {
541 let edit = change.added_str();
542 let start = change.start();
543
544 let range = start.byte()..change.taken_end().byte();
545 self.buf.splice(range, edit.bytes());
546
547 let start_rec = [start.byte(), start.char(), start.line()];
548 let old_len = [
549 change.taken_end().byte() - start.byte(),
550 change.taken_end().char() - start.char(),
551 change.taken_end().line() - start.line(),
552 ];
553 let new_len = [
554 change.added_end().byte() - start.byte(),
555 change.added_end().char() - start.char(),
556 change.added_end().line() - start.line(),
557 ];
558
559 self.records.transform(start_rec, old_len, new_len);
560 self.records.insert(start_rec);
561 }
562
563 #[track_caller]
565 pub(crate) fn add_record(&mut self, [b, c, l]: [usize; 3]) {
566 self.records.insert([b, c, l]);
567 }
568
569 pub fn get_contiguous(&self, range: impl TextRange) -> Option<&str> {
578 let range = range.to_range(self.len().byte());
579 let [s0, s1] = self.strs_inner(..).unwrap();
580
581 if range.end <= self.buf.gap() {
582 s0.get(range)
583 } else {
584 let gap = self.buf.gap();
585 s1.get(range.start.checked_sub(gap)?..range.end.checked_sub(gap)?)
586 }
587 }
588}
589
590pub struct Lines<'b> {
598 lines: [std::str::Lines<'b>; 2],
599 split_line: Option<String>,
600 fwd_i: usize,
601 rev_i: usize,
602 split_line_used: bool,
603}
604
605impl<'b> Lines<'b> {
606 fn new(
607 lines: [std::str::Lines<'b>; 2],
608 split_line: Option<String>,
609 fwd_i: usize,
610 rev_i: usize,
611 ) -> Self {
612 Self {
613 lines,
614 split_line,
615 fwd_i,
616 rev_i,
617 split_line_used: false,
618 }
619 }
620}
621
622impl<'b, 'text> Lending<'b> for Lines<'text> {
623 type Lend = (usize, &'b str);
624}
625
626impl<'b> Lender for Lines<'b> {
627 fn next(&mut self) -> Option<lender::Lend<'_, Self>> {
628 self.lines[0]
629 .next()
630 .or_else(|| {
631 if self.split_line_used {
632 None
633 } else {
634 self.split_line_used = true;
635 self.split_line.as_deref()
636 }
637 })
638 .or_else(|| self.lines[1].next())
639 .map(|line| {
640 self.fwd_i += 1;
641 (self.fwd_i - 1, line)
642 })
643 }
644
645 fn size_hint(&self) -> (usize, Option<usize>) {
646 (self.rev_i - self.fwd_i, Some(self.rev_i - self.fwd_i))
647 }
648}
649
650impl<'b> DoubleEndedLender for Lines<'b> {
651 fn next_back(&mut self) -> Option<lender::Lend<'_, Self>> {
652 self.lines[1]
653 .next_back()
654 .or_else(|| {
655 if self.split_line_used {
656 None
657 } else {
658 self.split_line_used = true;
659 self.split_line.as_deref()
660 }
661 })
662 .or_else(|| self.lines[0].next_back())
663 .map(|line| {
664 self.rev_i -= 1;
665 (self.rev_i, line)
666 })
667 }
668}
669
670impl<'b> ExactSizeLender for Lines<'b> {}
671
672#[derive(Clone)]
676pub struct Slices<'b>(pub(super) [std::slice::Iter<'b, u8>; 2]);
677
678impl<'b> Slices<'b> {
679 pub fn to_array(&self) -> [&'b [u8]; 2] {
681 self.0.clone().map(|iter| iter.as_slice())
682 }
683
684 pub fn try_to_string(self) -> Result<String, Utf8Error> {
693 let [s0, s1] = self.0.map(|arr| arr.as_slice());
694 Ok([str::from_utf8(s0)?, str::from_utf8(s1)?].join(""))
695 }
696
697 pub unsafe fn chars_unchecked(self) -> impl Iterator<Item = char> {
712 self.0
713 .into_iter()
714 .flat_map(|iter| unsafe { str::from_utf8_unchecked(iter.as_slice()) }.chars())
715 }
716}
717
718impl<'b> Iterator for Slices<'b> {
719 type Item = u8;
720
721 fn next(&mut self) -> Option<Self::Item> {
722 self.0[0].next().or_else(|| self.0[1].next()).copied()
723 }
724
725 fn size_hint(&self) -> (usize, Option<usize>) {
726 let (l0, u0) = self.0[0].size_hint();
727 let (l1, u1) = self.0[1].size_hint();
728 (l0 + l1, Some(u0.unwrap() + u1.unwrap()))
729 }
730}
731
732impl<'b> ExactSizeIterator for Slices<'b> {}
733
734impl<'b> DoubleEndedIterator for Slices<'b> {
735 fn next_back(&mut self) -> Option<Self::Item> {
736 self.0[1]
737 .next_back()
738 .or_else(|| self.0[0].next_back())
739 .copied()
740 }
741}
742
743#[must_use]
746#[inline]
747pub const fn utf8_char_width(b: u8) -> usize {
748 const UTF8_CHAR_WIDTH: &[u8; 256] = &[
750 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ];
768 UTF8_CHAR_WIDTH[b as usize] as usize
769}
770
771impl Eq for Bytes {}
772implPartialEq!(bytes: Bytes, other: Bytes, {
773 let (l_s0, l_s1) = bytes.buf.as_slices();
774 let (r_s0, r_s1) = other.buf.as_slices();
775 (l_s0.len() + l_s1.len() == r_s0.len() + r_s1.len()) && l_s0.iter().chain(l_s1).eq(r_s0.iter().chain(r_s1))
776});
777implPartialEq!(bytes: Bytes, other: &str, {
778 let [s0, s1] = bytes.strs_inner(..).unwrap();
779 other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
780});
781implPartialEq!(bytes: Bytes, other: String, bytes == &&other.as_str());
782implPartialEq!(str: &str, other: Bytes, other == *str);
783implPartialEq!(string: String, other: Bytes, other == *string);
784
785impl Eq for Strs<'_> {}
786implPartialEq!(strs: Strs<'_>, other: Strs<'_>, {
787 let [l_s0, l_s1] = strs.to_array();
788 let [r_s0, r_s1] = other.to_array();
789 (l_s0.len() + l_s1.len() == r_s0.len() + r_s1.len()) && l_s0.bytes().chain(l_s1.bytes()).eq(r_s0.bytes().chain(r_s1.bytes()))
790});
791implPartialEq!(strs: Strs<'_>, other: &str, {
792 let [s0, s1] = strs.to_array();
793 other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
794});
795implPartialEq!(strs: Strs<'_>, other: String, strs == &&other.as_str());
796implPartialEq!(str: &str, other: Strs<'_>, other == *str);
797implPartialEq!(string: String, other: Strs<'_>, other == *string);
798
799macro_rules! implFromToString {
801 ($T:ty) => {
802 impl From<$T> for Bytes {
803 fn from(value: $T) -> Self {
804 let string = <$T as ToString>::to_string(&value);
805 Bytes::new(&string)
806 }
807 }
808 };
809}
810
811implFromToString!(u8);
812implFromToString!(u16);
813implFromToString!(u32);
814implFromToString!(u64);
815implFromToString!(u128);
816implFromToString!(usize);
817implFromToString!(i8);
818implFromToString!(i16);
819implFromToString!(i32);
820implFromToString!(i64);
821implFromToString!(i128);
822implFromToString!(isize);
823implFromToString!(f32);
824implFromToString!(f64);
825implFromToString!(char);
826implFromToString!(&str);
827implFromToString!(String);
828implFromToString!(Box<str>);
829implFromToString!(std::rc::Rc<str>);
830implFromToString!(std::sync::Arc<str>);
831implFromToString!(std::borrow::Cow<'_, str>);
832implFromToString!(std::io::Error);
833implFromToString!(Box<dyn std::error::Error>);
834
835impl From<std::path::PathBuf> for Bytes {
836 fn from(value: std::path::PathBuf) -> Self {
837 let value = value.to_string_lossy();
838 Self::from(value)
839 }
840}
841
842impl From<&std::path::Path> for Bytes {
843 fn from(value: &std::path::Path) -> Self {
844 let value = value.to_string_lossy();
845 Self::from(value)
846 }
847}
848
849impl std::fmt::Debug for Bytes {
850 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
851 f.debug_struct("Bytes")
852 .field("buf", &self.strs_inner(..))
853 .field("records", &self.records)
854 .finish()
855 }
856}