1extern crate alloc;
4
5use core::ops::Range;
6
7use memchr::memchr;
8
9use crate::util::{self, is_blank, is_space, trim_left_space, utf8_len, TinyVec};
10use alloc::borrow::Cow;
11use alloc::string::String;
12use alloc::vec::Vec;
13
14#[allow(unused_imports)]
15#[cfg(all(not(feature = "std"), feature = "no-std-unix-debug"))]
16use crate::println;
17
18const SPACE: &[u8] = b" ";
19
20#[derive(Debug, Clone)]
29#[non_exhaustive]
30pub enum Value {
31 Index(Index),
33
34 String(String),
36}
37
38impl Value {
39 pub fn bytes<'a>(&'a self, source: &'a str) -> &'a [u8] {
41 match self {
42 Value::Index(index) => index.bytes(source),
43 Value::String(s) => s.as_bytes(),
44 }
45 }
46
47 pub fn str<'a>(&'a self, source: &'a str) -> &'a str {
49 match self {
50 Value::Index(index) => index.str(source),
51 Value::String(s) => s.as_str(),
52 }
53 }
54
55 pub fn is_empty(&self) -> bool {
57 match self {
58 Value::Index(index) => index.is_empty(),
59 Value::String(s) => s.is_empty(),
60 }
61 }
62
63 pub fn len(&self) -> usize {
65 match self {
66 Value::Index(index) => index.len(),
67 Value::String(s) => s.len(),
68 }
69 }
70}
71
72impl From<&str> for Value {
73 fn from(s: &str) -> Self {
74 Value::String(String::from(s))
75 }
76}
77
78impl From<String> for Value {
79 fn from(s: String) -> Self {
80 Value::String(s)
81 }
82}
83
84impl From<&[u8]> for Value {
85 fn from(s: &[u8]) -> Self {
86 Value::String(String::from_utf8_lossy(s).into_owned())
87 }
88}
89
90impl From<Vec<u8>> for Value {
91 fn from(s: Vec<u8>) -> Self {
92 Value::String(String::from_utf8_lossy(&s).into_owned())
93 }
94}
95
96impl From<&[char]> for Value {
97 fn from(s: &[char]) -> Self {
98 Value::String(s.iter().collect())
99 }
100}
101
102impl From<Cow<'_, [u8]>> for Value {
103 fn from(s: Cow<'_, [u8]>) -> Self {
104 Value::String(String::from_utf8_lossy(&s).into_owned())
105 }
106}
107
108impl From<Cow<'_, str>> for Value {
109 fn from(s: Cow<'_, str>) -> Self {
110 Value::String(s.into_owned())
111 }
112}
113
114impl From<&Value> for Value {
115 fn from(v: &Value) -> Self {
116 match v {
117 Value::Index(index) => Value::Index(*index),
118 Value::String(s) => Value::String(s.clone()),
119 }
120 }
121}
122
123impl From<(usize, usize)> for Value {
124 fn from((start, stop): (usize, usize)) -> Self {
125 Value::Index(Index::new(start, stop))
126 }
127}
128
129impl From<Segment> for Value {
130 fn from(segment: Segment) -> Self {
131 Value::Index(Index::new(segment.start(), segment.stop()))
132 }
133}
134#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
140pub struct Index {
141 start: usize,
142
143 stop: usize,
144}
145
146impl Index {
147 pub fn new(start: usize, stop: usize) -> Self {
149 Index { start, stop }
150 }
151
152 #[inline(always)]
154 pub fn start(&self) -> usize {
155 self.start
156 }
157
158 #[inline(always)]
160 pub fn stop(&self) -> usize {
161 self.stop
162 }
163
164 #[inline(always)]
166 pub fn bytes<'a>(&self, source: &'a str) -> &'a [u8] {
167 &source.as_bytes()[self.start..self.stop]
168 }
169
170 #[inline(always)]
175 pub fn str<'a>(&self, source: &'a str) -> &'a str {
176 unsafe { source.get_unchecked(self.start..self.stop) }
177 }
178
179 #[inline(always)]
181 pub fn is_empty(&self) -> bool {
182 self.start >= self.stop
183 }
184
185 #[inline(always)]
187 pub fn with_start(&self, v: usize) -> Index {
188 Index::new(v, self.stop)
189 }
190
191 #[inline(always)]
193 pub fn with_stop(&self, v: usize) -> Index {
194 Index::new(self.start, v)
195 }
196
197 #[inline(always)]
199 pub fn len(&self) -> usize {
200 self.stop - self.start
201 }
202}
203
204impl From<Index> for Value {
205 fn from(index: Index) -> Self {
206 Value::Index(index)
207 }
208}
209
210impl From<(usize, usize)> for Index {
211 fn from((start, stop): (usize, usize)) -> Self {
212 Index::new(start, stop)
213 }
214}
215
216impl From<Segment> for Index {
217 fn from(segment: Segment) -> Self {
218 Index::new(segment.start(), segment.stop())
219 }
220}
221
222#[derive(Debug, Clone, Default)]
231#[non_exhaustive]
232pub enum MultilineValue {
233 #[default]
234 Empty,
235 Indices(TinyVec<Index>),
236 String(String),
237}
238
239impl MultilineValue {
240 pub fn from_index(index: Index) -> Self {
242 MultilineValue::Indices(TinyVec::from_single(index))
243 }
244
245 pub fn from_indices(indices: Vec<Index>) -> Self {
247 MultilineValue::Indices(TinyVec::from_vec(indices))
248 }
249
250 pub fn from_string(s: String) -> Self {
252 MultilineValue::String(s)
253 }
254
255 pub fn str<'a>(&'a self, source: &'a str) -> Cow<'a, str> {
259 match self {
260 MultilineValue::Empty => Cow::Borrowed(""),
261 MultilineValue::Indices(indices) => {
262 let first = indices.get(0);
263 let second = indices.get(1);
264 if let Some(f) = first {
265 if second.is_none() {
266 return Cow::Borrowed(f.str(source));
267 }
268 } else {
269 return Cow::Borrowed("");
270 }
271 let mut result = String::new();
272 result.push_str(first.unwrap().str(source));
273 let b = second.unwrap().bytes(source);
274 result.push_str(unsafe { core::str::from_utf8_unchecked(trim_left_space(b)) });
275 for v in indices.iter().skip(2) {
276 let b = v.bytes(source);
277 result.push_str(unsafe { core::str::from_utf8_unchecked(trim_left_space(b)) });
278 }
279 Cow::Owned(result)
280 }
281 MultilineValue::String(s) => Cow::Borrowed(s.as_str()),
282 }
283 }
284
285 pub fn bytes<'a>(&'a self, source: &'a str) -> Cow<'a, [u8]> {
289 match self {
290 MultilineValue::Empty => Cow::Borrowed(&[]),
291 MultilineValue::Indices(indices) => {
292 let first = indices.get(0);
293 let second = indices.get(1);
294 if let Some(f) = first {
295 if second.is_none() {
296 return Cow::Borrowed(f.bytes(source));
297 }
298 } else {
299 return Cow::Borrowed(&[]);
300 }
301 let mut result = Vec::new();
302 result.extend_from_slice(first.unwrap().bytes(source));
303 result.extend_from_slice(trim_left_space(second.unwrap().bytes(source)));
304 for v in indices.iter().skip(2) {
305 result.extend_from_slice(trim_left_space(v.bytes(source)));
306 }
307 Cow::Owned(result)
308 }
309 MultilineValue::String(s) => Cow::Borrowed(s.as_bytes()),
310 }
311 }
312}
313
314impl From<String> for MultilineValue {
315 fn from(s: String) -> Self {
316 MultilineValue::String(s)
317 }
318}
319
320impl From<&String> for MultilineValue {
321 fn from(s: &String) -> Self {
322 MultilineValue::String(s.clone())
323 }
324}
325
326impl From<&str> for MultilineValue {
327 fn from(s: &str) -> Self {
328 MultilineValue::String(String::from(s))
329 }
330}
331
332impl From<&[u8]> for MultilineValue {
333 fn from(s: &[u8]) -> Self {
334 MultilineValue::String(String::from_utf8_lossy(s).into_owned())
335 }
336}
337
338impl From<Vec<u8>> for MultilineValue {
339 fn from(s: Vec<u8>) -> Self {
340 MultilineValue::String(String::from_utf8_lossy(&s).into_owned())
341 }
342}
343
344impl From<Cow<'_, str>> for MultilineValue {
345 fn from(s: Cow<'_, str>) -> Self {
346 MultilineValue::String(s.into_owned())
347 }
348}
349
350impl From<Cow<'_, [u8]>> for MultilineValue {
351 fn from(s: Cow<'_, [u8]>) -> Self {
352 MultilineValue::String(String::from_utf8_lossy(&s).into_owned())
353 }
354}
355
356impl From<Value> for MultilineValue {
357 fn from(v: Value) -> Self {
358 match v {
359 Value::Index(index) => MultilineValue::Indices(TinyVec::from_single(index)),
360 Value::String(s) => MultilineValue::String(s),
361 }
362 }
363}
364
365impl From<Segment> for MultilineValue {
366 fn from(segment: Segment) -> Self {
367 MultilineValue::Indices(TinyVec::from_single(segment.into()))
368 }
369}
370
371impl From<TinyVec<Index>> for MultilineValue {
372 fn from(indices: TinyVec<Index>) -> Self {
373 MultilineValue::Indices(indices)
374 }
375}
376
377#[derive(Debug, Clone, Default)]
390#[non_exhaustive]
391pub enum Lines {
392 #[default]
393 Empty,
394 Segments(Vec<Segment>),
395 String(String),
396}
397
398impl Lines {
399 pub fn from_segments(segments: Vec<Segment>) -> Self {
401 Lines::Segments(segments)
402 }
403
404 pub fn from_string(s: String) -> Self {
406 Lines::String(s)
407 }
408
409 pub fn iter<'a>(&'a self, source: &'a str) -> impl Iterator<Item = Cow<'a, str>> {
411 LinesIter::new(
412 match self {
413 Lines::Empty => LinesIterState::Empty,
414 Lines::Segments(segments) => LinesIterState::Segments(segments.iter()),
415 Lines::String(s) => LinesIterState::String(s.split_inclusive('\n')),
416 },
417 source,
418 )
419 }
420}
421
422impl From<String> for Lines {
423 fn from(s: String) -> Self {
424 Lines::String(s)
425 }
426}
427
428impl From<&String> for Lines {
429 fn from(s: &String) -> Self {
430 Lines::String(s.clone())
431 }
432}
433
434impl From<&str> for Lines {
435 fn from(s: &str) -> Self {
436 Lines::String(String::from(s))
437 }
438}
439
440impl From<&[u8]> for Lines {
441 fn from(s: &[u8]) -> Self {
442 Lines::String(String::from_utf8_lossy(s).into_owned())
443 }
444}
445
446impl From<Vec<Segment>> for Lines {
447 fn from(segments: Vec<Segment>) -> Self {
448 Lines::Segments(segments)
449 }
450}
451
452impl From<&[Segment]> for Lines {
453 fn from(segments: &[Segment]) -> Self {
454 Lines::Segments(segments.to_vec())
455 }
456}
457
458enum LinesIterState<'a> {
459 Empty,
460 Segments(core::slice::Iter<'a, Segment>),
461 String(core::str::SplitInclusive<'a, char>),
462}
463
464struct LinesIter<'a> {
466 state: LinesIterState<'a>,
467 source: &'a str,
468}
469
470impl<'a> LinesIter<'a> {
471 pub fn new(state: LinesIterState<'a>, source: &'a str) -> Self {
473 LinesIter { state, source }
474 }
475}
476
477impl<'a> Iterator for LinesIter<'a> {
478 type Item = Cow<'a, str>;
479
480 #[inline(always)]
481 fn next(&mut self) -> Option<Self::Item> {
482 match &mut self.state {
483 LinesIterState::Empty => None,
484 LinesIterState::Segments(iter) => iter.next().map(|segment| segment.str(self.source)),
485 LinesIterState::String(iter) => iter.next().map(Cow::Borrowed),
486 }
487 }
488}
489
490pub type Block = [Segment];
498
499fn binary_search_block_pos(block: &Block, pos: usize) -> Option<usize> {
500 let mut left = 0;
501 let mut right = block.len();
502 while left < right {
503 let mid = (left + right) / 2;
504 if block[mid].start() <= pos && pos < block[mid].stop() {
505 return Some(mid);
506 }
507 if pos < block[mid].start() {
508 right = mid;
509 } else {
510 left = mid + 1;
511 }
512 }
513 None
514}
515
516pub trait BlockExt {
518 fn to_values(&self) -> MultilineValue;
520}
521
522impl BlockExt for Block {
523 fn to_values(&self) -> MultilineValue {
524 let first = self.first();
525 let second = self.get(1);
526 if let Some(f) = first {
527 if second.is_none() {
528 return MultilineValue::from_index((f.start(), f.stop()).into());
529 }
530 } else {
531 return MultilineValue::default();
532 }
533 let mut result = Vec::with_capacity(self.len());
534 for v in self.iter() {
535 result.push((v.start(), v.stop()).into());
536 }
537 MultilineValue::from_indices(result)
538 }
539}
540
541pub(crate) fn block_to_values(i: impl IntoIterator<Item = Segment>) -> MultilineValue {
542 let mut b = i.into_iter();
543 let first = b.next();
544 let second = b.next();
545 if let Some(f) = first {
546 if second.is_none() {
547 return MultilineValue::from_index(f.into());
548 }
549 } else {
550 return MultilineValue::default();
551 }
552 let mut result = Vec::with_capacity(2 + b.size_hint().0);
553 result.push(first.unwrap().into());
554 result.push(second.unwrap().into());
555 for segment in b {
556 result.push(segment.into());
557 }
558 MultilineValue::from_indices(result)
559}
560
561#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
568pub struct Segment {
569 start: usize,
570
571 stop: usize,
572
573 padding: u8,
574
575 force_newline: bool,
576}
577
578impl Segment {
579 pub fn new(start: usize, stop: usize) -> Self {
581 Segment {
582 start,
583 stop,
584 padding: 0,
585 force_newline: false,
586 }
587 }
588
589 pub fn new_with_padding(start: usize, stop: usize, padding: usize) -> Self {
591 Segment {
592 start,
593 stop,
594 padding: padding as u8,
595 force_newline: false,
596 }
597 }
598
599 #[inline(always)]
601 pub fn start(&self) -> usize {
602 self.start
603 }
604
605 #[inline(always)]
607 pub fn stop(&self) -> usize {
608 self.stop
609 }
610
611 #[inline(always)]
616 pub fn padding(&self) -> usize {
617 self.padding as usize
618 }
619
620 #[inline(always)]
622 pub fn force_newline(&self) -> bool {
623 self.force_newline
624 }
625
626 pub fn bytes<'a>(&self, source: &'a str) -> Cow<'a, [u8]> {
628 if self.padding == 0
629 && (!self.force_newline || source.as_bytes().get(self.stop - 1) == Some(&b'\n'))
630 {
631 Cow::Borrowed(&source.as_bytes()[self.start..self.stop])
632 } else {
633 let mut result = Vec::with_capacity(self.padding() + self.stop - self.start + 1);
634 result.extend(core::iter::repeat_n(SPACE[0], self.padding()));
635 result.extend_from_slice(&source.as_bytes()[self.start..self.stop]);
636 if self.force_newline && !result.is_empty() && *result.last().unwrap() != b'\n' {
637 result.push(b'\n');
638 }
639 Cow::Owned(result)
640 }
641 }
642
643 pub fn str<'a>(&self, source: &'a str) -> Cow<'a, str> {
648 if self.padding == 0
649 && (!self.force_newline || source.as_bytes().get(self.stop - 1) == Some(&b'\n'))
650 {
651 unsafe { Cow::Borrowed(source.get_unchecked(self.start..self.stop)) }
652 } else {
653 let mut result = String::with_capacity(self.padding() + self.stop - self.start + 1);
654 result.extend(core::iter::repeat_n(' ', self.padding()));
655 unsafe { result.push_str(source.get_unchecked(self.start..self.stop)) };
656 if self.force_newline && !result.is_empty() && result.as_bytes().last() != Some(&b'\n')
657 {
658 result.push('\n');
659 }
660 Cow::Owned(result)
661 }
662 }
663
664 #[inline(always)]
666 pub fn len(&self) -> usize {
667 self.stop - self.start + self.padding()
668 }
669
670 pub fn between(&self, other: Segment) -> Segment {
672 if self.stop != other.stop {
673 panic!("invalid state");
674 }
675 Segment::new_with_padding(
676 self.start,
677 other.start,
678 (self.padding - other.padding) as usize,
679 )
680 }
681
682 #[inline(always)]
684 pub fn is_empty(&self) -> bool {
685 self.start >= self.stop && self.padding == 0
686 }
687
688 pub fn is_blank(&self, source: &str) -> bool {
690 let v = &source.as_bytes()[self.start..self.stop];
691 is_blank(v)
692 }
693
694 pub fn trim_right_space(&self, source: &str) -> Segment {
696 let v = &source.as_bytes()[self.start..self.stop];
697 let l = util::trim_right_space_length(v);
698 if l == v.len() {
699 Segment::new(self.start, self.start)
700 } else {
701 Segment::new_with_padding(self.start, self.stop - l, self.padding as usize)
702 }
703 }
704
705 pub fn trim_left_space(&self, source: &str) -> Segment {
707 let v = &source.as_bytes()[self.start..self.stop];
708 let l = util::trim_left_space_length(v);
709 Segment::new(self.start + l, self.stop)
710 }
711
712 pub fn trim_left_space_width(&self, mut width: isize, source: &str) -> Segment {
715 let mut padding = self.padding as isize;
716 while width > 0 && padding > 0 {
717 width -= 1;
718 padding -= 1;
719 }
720 if width == 0 {
721 return Segment::new_with_padding(self.start, self.stop, padding as usize);
722 }
723 let v = &source.as_bytes()[self.start..self.stop];
724 let mut start = self.start;
725 for &c in v {
726 if start >= self.stop - 1 || width == 0 {
727 break;
728 }
729 if c == b' ' {
730 width -= 1;
731 } else if c == b'\t' {
732 width -= 4;
733 } else {
734 break;
735 }
736 start += 1;
737 }
738 if width < 0 {
739 padding = -width;
740 }
741 Segment::new_with_padding(start, self.stop, padding as usize)
742 }
743
744 #[inline(always)]
746 pub fn with_start(&self, v: usize) -> Segment {
747 Segment::new_with_padding(v, self.stop, self.padding as usize)
748 }
749
750 #[inline(always)]
752 pub fn with_stop(&self, v: usize) -> Segment {
753 Segment::new_with_padding(self.start, v, self.padding as usize)
754 }
755
756 #[inline(always)]
758 pub fn with_padding(&self, v: usize) -> Segment {
759 Segment::new_with_padding(self.start, self.stop, v)
760 }
761
762 #[inline(always)]
764 pub fn with_force_newline(&self, v: bool) -> Segment {
765 Segment {
766 start: self.start,
767 stop: self.stop,
768 padding: self.padding,
769 force_newline: v,
770 }
771 }
772
773 #[inline(always)]
775 pub fn to_index(&self) -> Index {
776 Index::new(self.start, self.stop)
777 }
778}
779
780impl From<(usize, usize)> for Segment {
781 fn from((start, stop): (usize, usize)) -> Self {
782 Segment::new(start, stop)
783 }
784}
785
786impl From<(usize, usize, usize)> for Segment {
787 fn from((start, stop, padding): (usize, usize, usize)) -> Self {
788 Segment::new_with_padding(start, stop, padding)
789 }
790}
791
792impl From<Index> for Segment {
793 fn from(index: Index) -> Self {
794 Segment::new(index.start(), index.stop())
795 }
796}
797
798impl From<Segment> for Range<usize> {
799 fn from(segment: Segment) -> Self {
800 segment.start()..segment.stop()
801 }
802}
803
804pub const EOS: u8 = 0xff;
812
813pub trait Reader<'a> {
815 fn source(&self) -> &'a str;
817
818 fn position(&self) -> (usize, Segment);
820
821 fn reset_position(&mut self);
823
824 fn set_position(&mut self, line: usize, pos: Segment);
826
827 fn set_padding(&mut self, padding: usize);
829
830 fn peek_byte(&self) -> u8;
833
834 fn peek_line_segment(&self) -> Option<Segment>;
837
838 fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)>;
841
842 fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)>;
845
846 fn advance(&mut self, n: usize);
848
849 fn advance_and_set_padding(&mut self, n: usize, padding: usize);
852
853 fn advance_line(&mut self);
855
856 fn advance_to_eol(&mut self);
860
861 fn line_offset(&mut self) -> usize;
863
864 fn precending_charater(&self) -> char;
866
867 fn skip_blank_lines(&mut self) -> Option<(Cow<'a, [u8]>, Segment)> {
870 loop {
871 match self.peek_line_bytes() {
872 None => return None,
873 Some((line, seg)) => {
874 if is_blank(&line) {
875 self.advance_line();
876 continue;
877 }
878 return Some((line, seg));
879 }
880 }
881 }
882 }
883
884 fn skip_while<F>(&mut self, mut f: F) -> usize
886 where
887 F: FnMut(u8) -> bool,
888 {
889 let mut i = 0usize;
890 loop {
891 let b = self.peek_byte();
892 if b == EOS {
893 break;
894 }
895 if f(b) {
896 i += 1;
897 self.advance(1);
898 continue;
899 }
900 break;
901 }
902 i
903 }
904
905 fn skip_spaces(&mut self) -> usize {
907 self.skip_while(is_space)
908 }
909}
910
911pub struct BasicReader<'a> {
915 source: &'a str,
916 bsource: &'a [u8],
917 source_length: usize,
918 line: Option<usize>,
919 pos: Segment,
920 head: usize,
921 line_offset: Option<usize>,
922}
923
924impl<'a> BasicReader<'a> {
925 pub fn new(source: &'a str) -> Self {
927 let bsource: &[u8] = source.as_bytes();
928 let source_length = bsource.len();
929 let mut b = BasicReader {
930 source,
931 bsource,
932 source_length,
933 line: None,
934 pos: Segment::new(0, 0),
935 head: 0,
936 line_offset: None,
937 };
938 b.reset_position();
939 b
940 }
941
942 pub unsafe fn new_unchecked(source: &'a [u8]) -> Self {
947 Self::new(core::str::from_utf8_unchecked(source))
948 }
949}
950
951impl<'a> Reader<'a> for BasicReader<'a> {
952 fn source(&self) -> &'a str {
953 self.source
954 }
955
956 fn position(&self) -> (usize, Segment) {
957 (self.line.unwrap_or(0), self.pos)
958 }
959
960 fn reset_position(&mut self) {
961 self.line = None;
962 self.head = 0;
963 self.line_offset = None;
964 self.advance_line();
965 }
966
967 fn set_position(&mut self, line: usize, pos: Segment) {
968 self.line = Some(line);
969 self.pos = pos;
970 self.head = pos.start;
971 self.line_offset = None;
972 }
973
974 fn set_padding(&mut self, padding: usize) {
975 self.pos.padding = padding as u8;
976 }
977
978 fn peek_byte(&self) -> u8 {
979 if self.source_length == 0 {
980 return EOS;
981 }
982 if self.pos.padding() != 0 {
983 return SPACE[0];
984 }
985 if self.pos.start() < self.source_length {
986 return self.bsource[self.pos.start()];
987 }
988 EOS
989 }
990
991 fn peek_line_segment(&self) -> Option<Segment> {
992 if self.source_length == 0 {
993 return None;
994 }
995 if self.pos.start() < self.source_length {
996 return Some(self.pos);
997 }
998 None
999 }
1000
1001 fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)> {
1002 if self.source_length == 0 {
1003 return None;
1004 }
1005 if self.pos.start() < self.source_length {
1006 return Some((self.pos.bytes(self.source), self.pos));
1007 }
1008 None
1009 }
1010
1011 fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)> {
1012 if self.source_length == 0 {
1013 return None;
1014 }
1015 if self.pos.start() < self.source_length {
1016 return Some((self.pos.str(self.source), self.pos));
1017 }
1018 None
1019 }
1020
1021 fn advance(&mut self, n: usize) {
1022 if self.source_length == 0 {
1023 return;
1024 }
1025
1026 self.line_offset = None;
1027 if n < self.pos.len() && self.pos.padding() == 0 {
1028 self.pos.start += n;
1029 return;
1030 }
1031 let mut n = n;
1032 while n > 0 && self.pos.start < self.source_length {
1033 if self.pos.padding != 0 {
1034 self.pos.padding -= 1;
1035 n -= 1;
1036 continue;
1037 }
1038 if self.bsource[self.pos.start] == b'\n' {
1039 self.advance_line();
1040 n -= 1;
1041 continue;
1042 }
1043
1044 self.pos.start += 1;
1045 n -= 1;
1046 }
1047 }
1048
1049 fn advance_and_set_padding(&mut self, n: usize, padding: usize) {
1050 self.advance(n);
1051 if padding > self.pos.padding() {
1052 self.set_padding(padding);
1053 }
1054 }
1055
1056 fn advance_line(&mut self) {
1057 self.line_offset = None;
1058 if self.source_length == 0 || self.pos.start >= self.source_length {
1059 return;
1060 }
1061
1062 if self.line.is_some() {
1063 self.pos.start = self.pos.stop;
1064 if self.pos.start >= self.source_length {
1065 return;
1066 }
1067 self.pos.stop = self.source_length;
1068 if self.bsource[self.pos.start] != b'\n' {
1069 if let Some(i) = memchr(b'\n', &self.bsource[self.pos.start..]) {
1070 self.pos.stop = self.pos.start + i + 1;
1071 }
1072 } else {
1073 self.pos.stop = self.pos.start + 1;
1074 }
1075 self.line = Some(self.line.unwrap() + 1);
1076 } else {
1077 if let Some(i) = memchr(b'\n', self.bsource) {
1078 self.pos = (0, i + 1).into();
1079 } else {
1080 self.pos = (0, self.source_length).into();
1081 }
1082 self.line = Some(0);
1083 }
1084 self.head = self.pos.start;
1085 self.pos.padding = 0;
1086 }
1087
1088 fn advance_to_eol(&mut self) {
1089 if self.source_length == 0 || self.pos.start >= self.source_length {
1090 return;
1091 }
1092
1093 self.line_offset = None;
1094 if let Some(i) = memchr(b'\n', &self.bsource[self.pos.start..]) {
1095 self.pos.start += i;
1096 } else {
1097 self.pos.start = self.source_length;
1098 }
1099 self.pos.padding = 0;
1100 }
1101
1102 fn line_offset(&mut self) -> usize {
1103 if self.line_offset.is_none() {
1104 let mut v = 0;
1105 for i in self.head..self.pos.start {
1106 if self.bsource[i] == b'\t' {
1107 v += util::tab_width(v);
1108 } else {
1109 v += 1;
1110 }
1111 }
1112 v -= self.pos.padding();
1113 self.line_offset = Some(v);
1114 }
1115 self.line_offset.unwrap_or(0)
1116 }
1117
1118 fn precending_charater(&self) -> char {
1119 if self.pos.padding() != 0 {
1120 return ' ';
1121 }
1122 if self.pos.start() == 0 {
1123 return '\n';
1124 }
1125 let mut i = self.pos.start() - 1;
1126 loop {
1127 if let Some(l) = utf8_len(self.bsource[i]) {
1128 if l == 1 {
1129 return self.bsource[i] as char;
1130 }
1131 return str::from_utf8(&self.bsource[i..i + l])
1132 .ok()
1133 .and_then(|s| s.chars().next())
1134 .unwrap_or('\u{FFFD}');
1135 }
1136 i -= 1;
1137 if i == 0 {
1138 break;
1139 }
1140 }
1141 '\u{FFFD}'
1142 }
1143}
1144
1145pub struct BlockReader<'a> {
1151 source: &'a str,
1152 bsource: &'a [u8],
1153 block: &'a Block,
1154 line: Option<usize>,
1155 pos: Segment,
1156 head: usize,
1157 last: usize,
1158 line_offset: Option<usize>,
1159}
1160
1161impl<'a> BlockReader<'a> {
1162 pub fn new(source: &'a str, block: &'a Block) -> Self {
1164 let mut b = BlockReader {
1165 source,
1166 bsource: source.as_bytes(),
1167 block,
1168 line: None,
1169 pos: Segment::new(0, 0),
1170 head: 0,
1171 last: 0,
1172 line_offset: None,
1173 };
1174 b.reset(block);
1175 b
1176 }
1177
1178 pub unsafe fn new_unchecked(source: &'a [u8], block: &'a Block) -> Self {
1183 Self::new(core::str::from_utf8_unchecked(source), block)
1184 }
1185
1186 pub fn reset(&mut self, lines: &'a Block) {
1188 self.block = lines;
1189 self.reset_position();
1190 }
1191
1192 pub fn between_current(&mut self, line: usize, pos: Segment) -> MultilineValue {
1195 if line == self.line.unwrap_or(0) {
1196 let seg = self.block[line];
1197 if pos.start() >= seg.start() && self.pos.start() <= seg.stop() {
1198 return block_to_values(BetweenBlockIterator::single(
1199 pos.start()..self.pos.start(),
1200 ));
1201 }
1202 }
1203 block_to_values(BetweenBlockIterator::multi(
1204 BlockReader {
1205 source: self.source,
1206 bsource: self.bsource,
1207 block: self.block,
1208 line: self.line,
1209 pos: self.pos,
1210 head: self.head,
1211 last: self.last,
1212 line_offset: self.line_offset,
1213 },
1214 line,
1215 pos,
1216 ))
1217 }
1218
1219 pub fn between(&self, range: Range<usize>) -> MultilineValue {
1221 let from_line = binary_search_block_pos(self.block, range.start).unwrap_or(0);
1222 let mut from_pos = self.block[from_line];
1223 if range.start >= from_pos.start() && range.end <= from_pos.stop() {
1224 return block_to_values(BetweenBlockIterator::single(range));
1225 }
1226 let to_line =
1227 binary_search_block_pos(self.block, range.end).unwrap_or(self.block.len() - 1);
1228 let mut to_pos = self.block[to_line];
1229 to_pos.start = range.end;
1230 from_pos.start = range.start;
1231
1232 block_to_values(BetweenBlockIterator::multi(
1233 BlockReader {
1234 source: self.source,
1235 bsource: self.bsource,
1236 block: self.block,
1237 line: Some(to_line),
1238 pos: to_pos,
1239 head: 0,
1240 last: 0,
1241 line_offset: None,
1242 },
1243 from_line,
1244 from_pos,
1245 ))
1246 }
1247}
1248
1249struct MultilineBetweenBlock<'a> {
1250 reader: BlockReader<'a>,
1251 start_line: usize,
1252 start_pos: Segment,
1253 current_line: usize,
1254 current_pos: Segment,
1255}
1256
1257struct BetweenBlockIterator<'a> {
1258 multi: Option<MultilineBetweenBlock<'a>>,
1259 single: Option<Range<usize>>,
1260 done: bool,
1261}
1262
1263impl<'a> BetweenBlockIterator<'a> {
1264 fn multi(mut reader: BlockReader<'a>, line: usize, pos: Segment) -> BetweenBlockIterator<'a> {
1265 let (current_line, current_pos) = reader.position();
1266 reader.set_position(line, pos);
1267 BetweenBlockIterator {
1268 multi: Some(MultilineBetweenBlock {
1269 reader,
1270 start_line: line,
1271 start_pos: pos,
1272 current_line,
1273 current_pos,
1274 }),
1275 single: None,
1276 done: false,
1277 }
1278 }
1279
1280 fn single(range: Range<usize>) -> BetweenBlockIterator<'a> {
1281 BetweenBlockIterator {
1282 multi: None,
1283 single: Some(range),
1284 done: false,
1285 }
1286 }
1287}
1288
1289impl<'a> Iterator for BetweenBlockIterator<'a> {
1290 type Item = Segment;
1291
1292 fn next(&mut self) -> Option<Self::Item> {
1293 if self.done {
1294 return None;
1295 }
1296 if let Some(s) = &self.single {
1297 self.done = true;
1298 return Some((s.start, s.end).into());
1299 }
1300 if let Some(m) = &mut self.multi {
1301 let (ln, _) = m.reader.position();
1302 let (_, segment) = m.reader.peek_line_bytes()?;
1303 let start = if ln == m.start_line {
1304 m.start_pos.start()
1305 } else {
1306 segment.start()
1307 };
1308 let stop = if ln == m.current_line {
1309 m.current_pos.start()
1310 } else {
1311 segment.stop()
1312 };
1313 let seg = Segment::new(start, stop);
1314 if ln == m.current_line {
1315 m.reader.advance(stop - start);
1316 self.done = true;
1317 }
1318 m.reader.advance_line();
1319 return Some(seg);
1320 }
1321 None
1322 }
1323}
1324
1325impl<'a> Reader<'a> for BlockReader<'a> {
1326 fn source(&self) -> &'a str {
1327 self.source
1328 }
1329
1330 fn position(&self) -> (usize, Segment) {
1331 (self.line.unwrap_or(0), self.pos)
1332 }
1333
1334 fn reset_position(&mut self) {
1335 self.line = None;
1336 self.head = 0;
1337 self.last = 0;
1338 self.line_offset = None;
1339 self.pos.start = 0;
1340 self.pos.stop = 0;
1341 self.pos.padding = 0;
1342 self.pos.force_newline = false;
1343 if let Some(l) = self.block.last() {
1344 self.last = l.stop;
1345 }
1346 self.advance_line();
1347 }
1348
1349 fn set_position(&mut self, line: usize, pos: Segment) {
1350 self.line_offset = None;
1351 self.line = Some(line);
1352 self.pos = pos;
1353 if line < self.block.len() {
1354 self.head = self.block[line].start;
1355 }
1356 }
1357
1358 fn set_padding(&mut self, padding: usize) {
1359 self.line_offset = None;
1360 self.pos.padding = padding as u8;
1361 }
1362
1363 fn peek_byte(&self) -> u8 {
1364 if self.bsource.is_empty() || self.block.is_empty() {
1365 return EOS;
1366 }
1367 if self.pos.padding() != 0 {
1368 return SPACE[0];
1369 }
1370 let l = self.line.unwrap();
1371 if self.pos.is_empty() {
1372 if l < self.block.len() - 1 {
1373 let next = &self.block[l + 1];
1374 if next.padding() != 0 {
1375 return SPACE[0];
1376 }
1377 if next.start < self.bsource.len() {
1378 return self.bsource[next.start];
1379 }
1380 }
1381 return EOS;
1382 } else if self.pos.start < self.bsource.len() {
1383 return self.bsource[self.pos.start];
1384 }
1385 EOS
1386 }
1387
1388 fn peek_line_segment(&self) -> Option<Segment> {
1389 if self.bsource.is_empty() || self.block.is_empty() {
1390 return None;
1391 }
1392 let l = self.line.unwrap();
1393 if self.pos.is_empty() {
1394 if l < self.block.len() - 1 {
1395 let s = self.block[l + 1].start;
1396 if s < self.bsource.len() {
1397 return Some(self.block[l + 1]);
1398 }
1399 }
1400 return None;
1401 } else if self.pos.start < self.bsource.len() {
1402 return Some(self.pos);
1403 }
1404 None
1405 }
1406
1407 fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)> {
1408 if self.bsource.is_empty() || self.block.is_empty() {
1409 return None;
1410 }
1411 let l = self.line.unwrap();
1412 if self.pos.is_empty() {
1413 if l < self.block.len() - 1 {
1414 let s = self.block[l + 1].start;
1415 if s < self.bsource.len() {
1416 return Some((self.block[l + 1].bytes(self.source), self.block[l + 1]));
1417 }
1418 }
1419 return None;
1420 } else if self.pos.start < self.bsource.len() {
1421 return Some((self.pos.bytes(self.source), self.pos));
1422 }
1423 None
1424 }
1425
1426 fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)> {
1427 if self.bsource.is_empty() || self.block.is_empty() {
1428 return None;
1429 }
1430 let l = self.line.unwrap();
1431 if self.pos.is_empty() {
1432 if l < self.block.len() - 1 {
1433 let s = self.block[l + 1].start;
1434 if s < self.bsource.len() {
1435 return Some((self.block[l + 1].str(self.source), self.block[l + 1]));
1436 }
1437 }
1438 return None;
1439 } else if self.pos.start < self.bsource.len() {
1440 return Some((self.pos.str(self.source), self.pos));
1441 }
1442 None
1443 }
1444
1445 fn advance(&mut self, n: usize) {
1446 if self.bsource.is_empty() || self.block.is_empty() {
1447 return;
1448 }
1449 self.line_offset = None;
1450 if n < self.pos.len() && self.pos.padding() == 0 {
1451 self.pos.start += n;
1452 return;
1453 }
1454 let mut n = n;
1455 while n > 0 && self.pos.start < self.last {
1456 if self.pos.padding != 0 {
1457 self.pos.padding -= 1;
1458 n -= 1;
1459 continue;
1460 }
1461 if self.pos.start >= self.pos.stop - 1 && self.pos.stop < self.last {
1462 self.advance_line();
1463 n -= 1;
1464 continue;
1465 }
1466
1467 self.pos.start += 1;
1468 n -= 1;
1469 }
1470 }
1471
1472 fn advance_and_set_padding(&mut self, n: usize, padding: usize) {
1473 self.advance(n);
1474 if padding > self.pos.padding() {
1475 self.set_padding(padding);
1476 }
1477 }
1478
1479 fn advance_line(&mut self) {
1480 if self.bsource.is_empty() || self.block.is_empty() {
1481 return;
1482 }
1483 let l = match self.line {
1484 Some(l) => l + 1,
1485 None => 0,
1486 };
1487 if l < self.block.len() {
1488 self.set_position(l, self.block[l]);
1489 } else {
1490 self.pos.start = self.source().len();
1491 self.pos.stop = self.pos.start;
1492 self.pos.padding = 0;
1493 }
1494 }
1495
1496 fn advance_to_eol(&mut self) {
1497 if self.bsource.is_empty() || self.block.is_empty() {
1498 return;
1499 }
1500 self.line_offset = None;
1501 let c = self.bsource[self.pos.stop - 1];
1502 if c == b'\n' {
1503 self.pos.start = self.pos.stop - 1;
1504 } else {
1505 self.pos.start = self.pos.stop;
1506 }
1507 }
1508
1509 fn line_offset(&mut self) -> usize {
1510 if self.bsource.is_empty() || self.block.is_empty() {
1511 return 0;
1512 }
1513 if self.line_offset.is_none() {
1514 let mut v = 0;
1515 for i in self.head..self.pos.start {
1516 if self.bsource[i] == b'\t' {
1517 v += util::tab_width(v);
1518 } else {
1519 v += 1;
1520 }
1521 }
1522 v -= self.pos.padding();
1523 self.line_offset = Some(v);
1524 }
1525 self.line_offset.unwrap_or(0)
1526 }
1527
1528 fn precending_charater(&self) -> char {
1529 if self.pos.padding() != 0 {
1530 return ' ';
1531 }
1532 if self.pos.start() == 0 {
1533 return '\n';
1534 }
1535 if self.block.is_empty() {
1536 return '\n';
1537 }
1538 let first_line = &self.block[0];
1539 if self.line.unwrap_or(0) == 0 && self.pos.start() <= first_line.start() {
1540 return '\n';
1541 }
1542
1543 let mut i = self.pos.start() - 1;
1544 loop {
1545 if let Some(l) = utf8_len(self.bsource[i]) {
1546 if l == 1 {
1547 return self.bsource[i] as char;
1548 }
1549 return str::from_utf8(&self.bsource[i..i + l])
1550 .ok()
1551 .and_then(|s| s.chars().next())
1552 .unwrap_or('\u{FFFD}');
1553 }
1554 i -= 1;
1555 if i == 0 {
1556 break;
1557 }
1558 }
1559 if i == 0 {
1560 return '\n';
1561 }
1562 '\u{FFFD}'
1563 }
1564}
1565#[cfg(test)]
1572mod tests {
1573 use super::*;
1574
1575 #[allow(unused_imports)]
1576 #[cfg(all(not(feature = "std"), feature = "no-std-unix-debug"))]
1577 use crate::println;
1578
1579 #[test]
1580 fn test_segment() {
1581 let buffer = "Hello, world!";
1582 let segment: Segment = (0, 5).into();
1583 let s: &[u8] = &segment.bytes(buffer);
1584 assert_eq!(s, b"Hello");
1585
1586 let segment_with_padding = Segment::new_with_padding(0, 5, 3);
1587 let s: &[u8] = &segment_with_padding.bytes(buffer);
1588 assert_eq!(s, b" Hello");
1589 }
1590
1591 #[test]
1592 fn test_raw() {
1593 let buffer = "Hello, world!";
1594 let index = Value::from((0, 5));
1595 let s: &[u8] = index.bytes(buffer);
1596 assert_eq!(s, b"Hello");
1597
1598 let raw_string = Value::from("Hello");
1599 let s: &[u8] = raw_string.bytes(buffer);
1600 assert_eq!(s, b"Hello");
1601
1602 let str: &str = index.str(buffer);
1603 assert_eq!(str, "Hello");
1604
1605 let string = String::from("Hello");
1606 let v = Value::from(string.as_str());
1607 assert_eq!(v.str(buffer), "Hello");
1608 }
1609
1610 #[test]
1611 fn test_bytes_reader() {
1612 let buffer = "Hello, world!\nThis is a test.\n";
1613 let mut reader = BasicReader::new(buffer);
1614 assert_eq!(reader.peek_byte(), b'H');
1615
1616 if let Some((line, segment)) = reader.peek_line_bytes() {
1617 assert_eq!(line.as_ref(), b"Hello, world!\n");
1618 assert_eq!(segment.start(), 0);
1619 assert_eq!(segment.stop(), 14);
1620 } else {
1621 panic!("Expected a line");
1622 }
1623
1624 reader.advance(7);
1625 assert_eq!(reader.peek_byte(), b'w');
1626
1627 reader.advance_line();
1628 assert_eq!(reader.peek_byte(), b'T');
1629
1630 if let Some((line, segment)) = reader.peek_line_bytes() {
1631 assert_eq!(line.as_ref(), b"This is a test.\n");
1632 assert_eq!(segment.start(), 14);
1633 assert_eq!(segment.stop(), 30);
1634 } else {
1635 panic!("Expected a line");
1636 }
1637
1638 reader.advance(100); assert_eq!(reader.peek_byte(), EOS);
1640 assert!(reader.peek_line_bytes().is_none());
1641 }
1642
1643 #[test]
1644 fn test_bytes_reader_empty() {
1645 let buffer = "";
1646 let mut reader = BasicReader::new(buffer);
1647 assert_eq!(reader.peek_byte(), EOS);
1648 assert!(reader.peek_line_bytes().is_none());
1649 reader.advance(10);
1650 assert_eq!(reader.peek_byte(), EOS);
1651 assert!(reader.peek_line_bytes().is_none());
1652 reader.advance_line();
1653 assert_eq!(reader.peek_byte(), EOS);
1654 assert!(reader.peek_line_bytes().is_none());
1655 }
1656
1657 #[test]
1658 fn test_block_reader() {
1659 let buffer = "Hello, world!\nThis is a test.\n";
1660 let lines = [Segment::new(0, 14), Segment::new_with_padding(14, 30, 2)];
1661 let mut reader = BlockReader::new(buffer, &lines);
1662 assert_eq!(reader.peek_byte(), b'H');
1663
1664 if let Some((line, segment)) = reader.peek_line_bytes() {
1665 assert_eq!(line.as_ref(), b"Hello, world!\n");
1666 assert_eq!(segment.start(), 0);
1667 assert_eq!(segment.stop(), 14);
1668 } else {
1669 panic!("Expected a line");
1670 }
1671
1672 reader.advance(13);
1673 assert_eq!(reader.peek_byte(), b'\n');
1674
1675 reader.advance(1);
1676 assert_eq!(reader.peek_byte(), SPACE[0]);
1677
1678 if let Some((line, segment)) = reader.peek_line_bytes() {
1679 assert_eq!(line.as_ref(), b" This is a test.\n");
1680 assert_eq!(segment.start(), 14);
1681 assert_eq!(segment.stop(), 30);
1682 assert_eq!(segment.padding(), 2);
1683 } else {
1684 panic!("Expected a line");
1685 }
1686
1687 reader.advance(3);
1688 assert_eq!(reader.peek_byte(), b'h');
1689
1690 reader.advance(100); assert_eq!(reader.peek_byte(), EOS);
1692 assert!(reader.peek_line_bytes().is_none());
1693 }
1694
1695 #[test]
1696 fn test_block_reader_empty() {
1697 let buffer = "";
1698 let lines: [Segment; 0] = [];
1699 let mut reader = BlockReader::new(buffer, &lines);
1700 assert_eq!(reader.peek_byte(), EOS);
1701 assert!(reader.peek_line_bytes().is_none());
1702 reader.advance(10);
1703 assert_eq!(reader.peek_byte(), EOS);
1704 assert!(reader.peek_line_bytes().is_none());
1705 reader.advance_line();
1706 assert_eq!(reader.peek_byte(), EOS);
1707 assert!(reader.peek_line_bytes().is_none());
1708 }
1709}
1710
1711