1extern crate alloc;
4
5use core::ops::Range;
6
7use memchr::memchr;
8
9use crate::util::{self, is_blank, is_space, trim_left_space, utf8_len, TinyVec};
10use alloc::borrow::Cow;
11use alloc::string::String;
12use alloc::vec::Vec;
13
14#[allow(unused_imports)]
15#[cfg(all(not(feature = "std"), feature = "no-std-unix-debug"))]
16use crate::println;
17
18const SPACE: &[u8] = b" ";
19
20#[derive(Debug, Clone)]
29#[non_exhaustive]
30pub enum Value {
31 Index(Index),
33
34 String(String),
36}
37
38impl Value {
39 pub fn bytes<'a>(&'a self, source: &'a str) -> &'a [u8] {
41 match self {
42 Value::Index(index) => index.bytes(source),
43 Value::String(s) => s.as_bytes(),
44 }
45 }
46
47 pub fn str<'a>(&'a self, source: &'a str) -> &'a str {
49 match self {
50 Value::Index(index) => index.str(source),
51 Value::String(s) => s.as_str(),
52 }
53 }
54
55 pub fn is_empty(&self) -> bool {
57 match self {
58 Value::Index(index) => index.is_empty(),
59 Value::String(s) => s.is_empty(),
60 }
61 }
62
63 pub fn len(&self) -> usize {
65 match self {
66 Value::Index(index) => index.len(),
67 Value::String(s) => s.len(),
68 }
69 }
70}
71
72impl From<&str> for Value {
73 fn from(s: &str) -> Self {
74 Value::String(String::from(s))
75 }
76}
77
78impl From<String> for Value {
79 fn from(s: String) -> Self {
80 Value::String(s)
81 }
82}
83
84impl From<&[u8]> for Value {
85 fn from(s: &[u8]) -> Self {
86 Value::String(String::from_utf8_lossy(s).into_owned())
87 }
88}
89
90impl From<Vec<u8>> for Value {
91 fn from(s: Vec<u8>) -> Self {
92 Value::String(String::from_utf8_lossy(&s).into_owned())
93 }
94}
95
96impl From<&[char]> for Value {
97 fn from(s: &[char]) -> Self {
98 Value::String(s.iter().collect())
99 }
100}
101
102impl From<Cow<'_, [u8]>> for Value {
103 fn from(s: Cow<'_, [u8]>) -> Self {
104 Value::String(String::from_utf8_lossy(&s).into_owned())
105 }
106}
107
108impl From<Cow<'_, str>> for Value {
109 fn from(s: Cow<'_, str>) -> Self {
110 Value::String(s.into_owned())
111 }
112}
113
114impl From<&Value> for Value {
115 fn from(v: &Value) -> Self {
116 match v {
117 Value::Index(index) => Value::Index(*index),
118 Value::String(s) => Value::String(s.clone()),
119 }
120 }
121}
122
123impl From<(usize, usize)> for Value {
124 fn from((start, stop): (usize, usize)) -> Self {
125 Value::Index(Index::new(start, stop))
126 }
127}
128
129impl From<Segment> for Value {
130 fn from(segment: Segment) -> Self {
131 Value::Index(Index::new(segment.start(), segment.stop()))
132 }
133}
134#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
140pub struct Index {
141 start: usize,
142
143 stop: usize,
144}
145
146impl Index {
147 pub fn new(start: usize, stop: usize) -> Self {
149 Index { start, stop }
150 }
151
152 #[inline(always)]
154 pub fn start(&self) -> usize {
155 self.start
156 }
157
158 #[inline(always)]
160 pub fn stop(&self) -> usize {
161 self.stop
162 }
163
164 #[inline(always)]
166 pub fn bytes<'a>(&self, source: &'a str) -> &'a [u8] {
167 &source.as_bytes()[self.start..self.stop]
168 }
169
170 #[inline(always)]
175 pub fn str<'a>(&self, source: &'a str) -> &'a str {
176 unsafe { source.get_unchecked(self.start..self.stop) }
177 }
178
179 #[inline(always)]
181 pub fn is_empty(&self) -> bool {
182 self.start >= self.stop
183 }
184
185 #[inline(always)]
187 pub fn with_start(&self, v: usize) -> Index {
188 Index::new(v, self.stop)
189 }
190
191 #[inline(always)]
193 pub fn with_stop(&self, v: usize) -> Index {
194 Index::new(self.start, v)
195 }
196
197 #[inline(always)]
199 pub fn len(&self) -> usize {
200 self.stop - self.start
201 }
202}
203
204impl From<Index> for Value {
205 fn from(index: Index) -> Self {
206 Value::Index(index)
207 }
208}
209
210impl From<(usize, usize)> for Index {
211 fn from((start, stop): (usize, usize)) -> Self {
212 Index::new(start, stop)
213 }
214}
215
216impl From<Segment> for Index {
217 fn from(segment: Segment) -> Self {
218 Index::new(segment.start(), segment.stop())
219 }
220}
221
222#[derive(Debug, Clone, Default)]
231#[non_exhaustive]
232pub enum MultilineValue {
233 #[default]
234 Empty,
235 Indices(TinyVec<Index>),
236 String(String),
237}
238
239impl MultilineValue {
240 pub fn from_index(index: Index) -> Self {
242 MultilineValue::Indices(TinyVec::from_single(index))
243 }
244
245 pub fn from_indices(indices: Vec<Index>) -> Self {
247 MultilineValue::Indices(TinyVec::from_vec(indices))
248 }
249
250 pub fn from_string(s: String) -> Self {
252 MultilineValue::String(s)
253 }
254
255 pub fn str<'a>(&'a self, source: &'a str) -> Cow<'a, str> {
259 match self {
260 MultilineValue::Empty => Cow::Borrowed(""),
261 MultilineValue::Indices(indices) => {
262 let first = indices.get(0);
263 let second = indices.get(1);
264 if let Some(f) = first {
265 if second.is_none() {
266 return Cow::Borrowed(f.str(source));
267 }
268 } else {
269 return Cow::Borrowed("");
270 }
271 let mut result = String::new();
272 result.push_str(first.unwrap().str(source));
273 let b = second.unwrap().bytes(source);
274 result.push_str(unsafe { core::str::from_utf8_unchecked(trim_left_space(b)) });
275 for v in indices.iter().skip(2) {
276 let b = v.bytes(source);
277 result.push_str(unsafe { core::str::from_utf8_unchecked(trim_left_space(b)) });
278 }
279 Cow::Owned(result)
280 }
281 MultilineValue::String(s) => Cow::Borrowed(s.as_str()),
282 }
283 }
284
285 pub fn bytes<'a>(&'a self, source: &'a str) -> Cow<'a, [u8]> {
289 match self {
290 MultilineValue::Empty => Cow::Borrowed(&[]),
291 MultilineValue::Indices(indices) => {
292 let first = indices.get(0);
293 let second = indices.get(1);
294 if let Some(f) = first {
295 if second.is_none() {
296 return Cow::Borrowed(f.bytes(source));
297 }
298 } else {
299 return Cow::Borrowed(&[]);
300 }
301 let mut result = Vec::new();
302 result.extend_from_slice(first.unwrap().bytes(source));
303 result.extend_from_slice(trim_left_space(second.unwrap().bytes(source)));
304 for v in indices.iter().skip(2) {
305 result.extend_from_slice(trim_left_space(v.bytes(source)));
306 }
307 Cow::Owned(result)
308 }
309 MultilineValue::String(s) => Cow::Borrowed(s.as_bytes()),
310 }
311 }
312}
313
314impl From<String> for MultilineValue {
315 fn from(s: String) -> Self {
316 MultilineValue::String(s)
317 }
318}
319
320impl From<&String> for MultilineValue {
321 fn from(s: &String) -> Self {
322 MultilineValue::String(s.clone())
323 }
324}
325
326impl From<&str> for MultilineValue {
327 fn from(s: &str) -> Self {
328 MultilineValue::String(String::from(s))
329 }
330}
331
332impl From<&[u8]> for MultilineValue {
333 fn from(s: &[u8]) -> Self {
334 MultilineValue::String(String::from_utf8_lossy(s).into_owned())
335 }
336}
337
338impl From<Cow<'_, str>> for MultilineValue {
339 fn from(s: Cow<'_, str>) -> Self {
340 MultilineValue::String(s.into_owned())
341 }
342}
343
344impl From<Cow<'_, [u8]>> for MultilineValue {
345 fn from(s: Cow<'_, [u8]>) -> Self {
346 MultilineValue::String(String::from_utf8_lossy(&s).into_owned())
347 }
348}
349
350impl From<Value> for MultilineValue {
351 fn from(v: Value) -> Self {
352 match v {
353 Value::Index(index) => MultilineValue::Indices(TinyVec::from_single(index)),
354 Value::String(s) => MultilineValue::String(s),
355 }
356 }
357}
358
359impl From<TinyVec<Index>> for MultilineValue {
360 fn from(indices: TinyVec<Index>) -> Self {
361 MultilineValue::Indices(indices)
362 }
363}
364
365#[derive(Debug, Clone, Default)]
378#[non_exhaustive]
379pub enum Lines {
380 #[default]
381 Empty,
382 Segments(Vec<Segment>),
383 String(String),
384}
385
386impl Lines {
387 pub fn from_segments(segments: Vec<Segment>) -> Self {
389 Lines::Segments(segments)
390 }
391
392 pub fn from_string(s: String) -> Self {
394 Lines::String(s)
395 }
396
397 pub fn iter<'a>(&'a self, source: &'a str) -> impl Iterator<Item = Cow<'a, str>> {
399 LinesIter::new(
400 match self {
401 Lines::Empty => LinesIterState::Empty,
402 Lines::Segments(segments) => LinesIterState::Segments(segments.iter()),
403 Lines::String(s) => LinesIterState::String(s.split_inclusive('\n')),
404 },
405 source,
406 )
407 }
408}
409
410impl From<String> for Lines {
411 fn from(s: String) -> Self {
412 Lines::String(s)
413 }
414}
415
416impl From<&String> for Lines {
417 fn from(s: &String) -> Self {
418 Lines::String(s.clone())
419 }
420}
421
422impl From<&str> for Lines {
423 fn from(s: &str) -> Self {
424 Lines::String(String::from(s))
425 }
426}
427
428impl From<&[u8]> for Lines {
429 fn from(s: &[u8]) -> Self {
430 Lines::String(String::from_utf8_lossy(s).into_owned())
431 }
432}
433
434impl From<Vec<Segment>> for Lines {
435 fn from(segments: Vec<Segment>) -> Self {
436 Lines::Segments(segments)
437 }
438}
439
440impl From<&[Segment]> for Lines {
441 fn from(segments: &[Segment]) -> Self {
442 Lines::Segments(segments.to_vec())
443 }
444}
445
446enum LinesIterState<'a> {
447 Empty,
448 Segments(core::slice::Iter<'a, Segment>),
449 String(core::str::SplitInclusive<'a, char>),
450}
451
452struct LinesIter<'a> {
454 state: LinesIterState<'a>,
455 source: &'a str,
456}
457
458impl<'a> LinesIter<'a> {
459 pub fn new(state: LinesIterState<'a>, source: &'a str) -> Self {
461 LinesIter { state, source }
462 }
463}
464
465impl<'a> Iterator for LinesIter<'a> {
466 type Item = Cow<'a, str>;
467
468 #[inline(always)]
469 fn next(&mut self) -> Option<Self::Item> {
470 match &mut self.state {
471 LinesIterState::Empty => None,
472 LinesIterState::Segments(iter) => iter.next().map(|segment| segment.str(self.source)),
473 LinesIterState::String(iter) => iter.next().map(Cow::Borrowed),
474 }
475 }
476}
477
478pub type Block = [Segment];
486
487fn binary_search_block_pos(block: &Block, pos: usize) -> Option<usize> {
488 let mut left = 0;
489 let mut right = block.len();
490 while left < right {
491 let mid = (left + right) / 2;
492 if block[mid].start() <= pos && pos < block[mid].stop() {
493 return Some(mid);
494 }
495 if pos < block[mid].start() {
496 right = mid;
497 } else {
498 left = mid + 1;
499 }
500 }
501 None
502}
503
504pub trait BlockExt {
506 fn to_values(&self) -> MultilineValue;
508}
509
510impl BlockExt for Block {
511 fn to_values(&self) -> MultilineValue {
512 let first = self.first();
513 let second = self.get(1);
514 if let Some(f) = first {
515 if second.is_none() {
516 return MultilineValue::from_index((f.start(), f.stop()).into());
517 }
518 } else {
519 return MultilineValue::default();
520 }
521 let mut result = Vec::with_capacity(self.len());
522 for v in self.iter() {
523 result.push((v.start(), v.stop()).into());
524 }
525 MultilineValue::from_indices(result)
526 }
527}
528
529pub(crate) fn block_to_values(i: impl IntoIterator<Item = Segment>) -> MultilineValue {
530 let mut b = i.into_iter();
531 let first = b.next();
532 let second = b.next();
533 if let Some(f) = first {
534 if second.is_none() {
535 return MultilineValue::from_index(f.into());
536 }
537 } else {
538 return MultilineValue::default();
539 }
540 let mut result = Vec::with_capacity(2 + b.size_hint().0);
541 result.push(first.unwrap().into());
542 result.push(second.unwrap().into());
543 for segment in b {
544 result.push(segment.into());
545 }
546 MultilineValue::from_indices(result)
547}
548
549#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
556pub struct Segment {
557 start: usize,
558
559 stop: usize,
560
561 padding: u8,
562
563 force_newline: bool,
564}
565
566impl Segment {
567 pub fn new(start: usize, stop: usize) -> Self {
569 Segment {
570 start,
571 stop,
572 padding: 0,
573 force_newline: false,
574 }
575 }
576
577 pub fn new_with_padding(start: usize, stop: usize, padding: usize) -> Self {
579 Segment {
580 start,
581 stop,
582 padding: padding as u8,
583 force_newline: false,
584 }
585 }
586
587 #[inline(always)]
589 pub fn start(&self) -> usize {
590 self.start
591 }
592
593 #[inline(always)]
595 pub fn stop(&self) -> usize {
596 self.stop
597 }
598
599 #[inline(always)]
604 pub fn padding(&self) -> usize {
605 self.padding as usize
606 }
607
608 #[inline(always)]
610 pub fn force_newline(&self) -> bool {
611 self.force_newline
612 }
613
614 pub fn bytes<'a>(&self, source: &'a str) -> Cow<'a, [u8]> {
616 if self.padding == 0
617 && (!self.force_newline || source.as_bytes().get(self.stop - 1) == Some(&b'\n'))
618 {
619 Cow::Borrowed(&source.as_bytes()[self.start..self.stop])
620 } else {
621 let mut result = Vec::with_capacity(self.padding() + self.stop - self.start + 1);
622 result.extend(core::iter::repeat_n(SPACE[0], self.padding()));
623 result.extend_from_slice(&source.as_bytes()[self.start..self.stop]);
624 if self.force_newline && !result.is_empty() && *result.last().unwrap() != b'\n' {
625 result.push(b'\n');
626 }
627 Cow::Owned(result)
628 }
629 }
630
631 pub fn str<'a>(&self, source: &'a str) -> Cow<'a, str> {
636 if self.padding == 0
637 && (!self.force_newline || source.as_bytes().get(self.stop - 1) == Some(&b'\n'))
638 {
639 unsafe { Cow::Borrowed(source.get_unchecked(self.start..self.stop)) }
640 } else {
641 let mut result = String::with_capacity(self.padding() + self.stop - self.start + 1);
642 result.extend(core::iter::repeat_n(' ', self.padding()));
643 unsafe { result.push_str(source.get_unchecked(self.start..self.stop)) };
644 if self.force_newline && !result.is_empty() && result.as_bytes().last() != Some(&b'\n')
645 {
646 result.push('\n');
647 }
648 Cow::Owned(result)
649 }
650 }
651
652 #[inline(always)]
654 pub fn len(&self) -> usize {
655 self.stop - self.start + self.padding()
656 }
657
658 pub fn between(&self, other: Segment) -> Segment {
660 if self.stop != other.stop {
661 panic!("invalid state");
662 }
663 Segment::new_with_padding(
664 self.start,
665 other.start,
666 (self.padding - other.padding) as usize,
667 )
668 }
669
670 #[inline(always)]
672 pub fn is_empty(&self) -> bool {
673 self.start >= self.stop && self.padding == 0
674 }
675
676 pub fn is_blank(&self, source: &str) -> bool {
678 let v = &source.as_bytes()[self.start..self.stop];
679 is_blank(v)
680 }
681
682 pub fn trim_right_space(&self, source: &str) -> Segment {
684 let v = &source.as_bytes()[self.start..self.stop];
685 let l = util::trim_right_space_length(v);
686 if l == v.len() {
687 Segment::new(self.start, self.start)
688 } else {
689 Segment::new_with_padding(self.start, self.stop - l, self.padding as usize)
690 }
691 }
692
693 pub fn trim_left_space(&self, source: &str) -> Segment {
695 let v = &source.as_bytes()[self.start..self.stop];
696 let l = util::trim_left_space_length(v);
697 Segment::new(self.start + l, self.stop)
698 }
699
700 pub fn trim_left_space_width(&self, mut width: isize, source: &str) -> Segment {
703 let mut padding = self.padding as isize;
704 while width > 0 && padding > 0 {
705 width -= 1;
706 padding -= 1;
707 }
708 if width == 0 {
709 return Segment::new_with_padding(self.start, self.stop, padding as usize);
710 }
711 let v = &source.as_bytes()[self.start..self.stop];
712 let mut start = self.start;
713 for &c in v {
714 if start >= self.stop - 1 || width == 0 {
715 break;
716 }
717 if c == b' ' {
718 width -= 1;
719 } else if c == b'\t' {
720 width -= 4;
721 } else {
722 break;
723 }
724 start += 1;
725 }
726 if width < 0 {
727 padding = -width;
728 }
729 Segment::new_with_padding(start, self.stop, padding as usize)
730 }
731
732 #[inline(always)]
734 pub fn with_start(&self, v: usize) -> Segment {
735 Segment::new_with_padding(v, self.stop, self.padding as usize)
736 }
737
738 #[inline(always)]
740 pub fn with_stop(&self, v: usize) -> Segment {
741 Segment::new_with_padding(self.start, v, self.padding as usize)
742 }
743
744 #[inline(always)]
746 pub fn with_padding(&self, v: usize) -> Segment {
747 Segment::new_with_padding(self.start, self.stop, v)
748 }
749
750 #[inline(always)]
752 pub fn with_force_newline(&self, v: bool) -> Segment {
753 Segment {
754 start: self.start,
755 stop: self.stop,
756 padding: self.padding,
757 force_newline: v,
758 }
759 }
760
761 #[inline(always)]
763 pub fn to_index(&self) -> Index {
764 Index::new(self.start, self.stop)
765 }
766}
767
768impl From<(usize, usize)> for Segment {
769 fn from((start, stop): (usize, usize)) -> Self {
770 Segment::new(start, stop)
771 }
772}
773
774impl From<(usize, usize, usize)> for Segment {
775 fn from((start, stop, padding): (usize, usize, usize)) -> Self {
776 Segment::new_with_padding(start, stop, padding)
777 }
778}
779
780impl From<Index> for Segment {
781 fn from(index: Index) -> Self {
782 Segment::new(index.start(), index.stop())
783 }
784}
785
786impl From<Segment> for Range<usize> {
787 fn from(segment: Segment) -> Self {
788 segment.start()..segment.stop()
789 }
790}
791
792pub const EOS: u8 = 0xff;
800
801pub trait Reader<'a> {
803 fn source(&self) -> &'a str;
805
806 fn position(&self) -> (usize, Segment);
808
809 fn reset_position(&mut self);
811
812 fn set_position(&mut self, line: usize, pos: Segment);
814
815 fn set_padding(&mut self, padding: usize);
817
818 fn peek_byte(&self) -> u8;
821
822 fn peek_line_segment(&self) -> Option<Segment>;
825
826 fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)>;
829
830 fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)>;
833
834 fn advance(&mut self, n: usize);
836
837 fn advance_and_set_padding(&mut self, n: usize, padding: usize);
840
841 fn advance_line(&mut self);
843
844 fn advance_to_eol(&mut self);
848
849 fn line_offset(&mut self) -> usize;
851
852 fn precending_charater(&self) -> char;
854
855 fn skip_blank_lines(&mut self) -> Option<(Cow<'a, [u8]>, Segment)> {
858 loop {
859 match self.peek_line_bytes() {
860 None => return None,
861 Some((line, seg)) => {
862 if is_blank(&line) {
863 self.advance_line();
864 continue;
865 }
866 return Some((line, seg));
867 }
868 }
869 }
870 }
871
872 fn skip_while<F>(&mut self, mut f: F) -> usize
874 where
875 F: FnMut(u8) -> bool,
876 {
877 let mut i = 0usize;
878 loop {
879 let b = self.peek_byte();
880 if b == EOS {
881 break;
882 }
883 if f(b) {
884 i += 1;
885 self.advance(1);
886 continue;
887 }
888 break;
889 }
890 i
891 }
892
893 fn skip_spaces(&mut self) -> usize {
895 self.skip_while(is_space)
896 }
897}
898
899pub struct BasicReader<'a> {
903 source: &'a str,
904 bsource: &'a [u8],
905 source_length: usize,
906 line: Option<usize>,
907 pos: Segment,
908 head: usize,
909 line_offset: Option<usize>,
910}
911
912impl<'a> BasicReader<'a> {
913 pub fn new(source: &'a str) -> Self {
915 let bsource: &[u8] = source.as_bytes();
916 let source_length = bsource.len();
917 let mut b = BasicReader {
918 source,
919 bsource,
920 source_length,
921 line: None,
922 pos: Segment::new(0, 0),
923 head: 0,
924 line_offset: None,
925 };
926 b.reset_position();
927 b
928 }
929
930 pub unsafe fn new_unchecked(source: &'a [u8]) -> Self {
935 Self::new(core::str::from_utf8_unchecked(source))
936 }
937}
938
939impl<'a> Reader<'a> for BasicReader<'a> {
940 fn source(&self) -> &'a str {
941 self.source
942 }
943
944 fn position(&self) -> (usize, Segment) {
945 (self.line.unwrap_or(0), self.pos)
946 }
947
948 fn reset_position(&mut self) {
949 self.line = None;
950 self.head = 0;
951 self.line_offset = None;
952 self.advance_line();
953 }
954
955 fn set_position(&mut self, line: usize, pos: Segment) {
956 self.line = Some(line);
957 self.pos = pos;
958 self.head = pos.start;
959 self.line_offset = None;
960 }
961
962 fn set_padding(&mut self, padding: usize) {
963 self.pos.padding = padding as u8;
964 }
965
966 fn peek_byte(&self) -> u8 {
967 if self.source_length == 0 {
968 return EOS;
969 }
970 if self.pos.padding() != 0 {
971 return SPACE[0];
972 }
973 if self.pos.start() < self.source_length {
974 return self.bsource[self.pos.start()];
975 }
976 EOS
977 }
978
979 fn peek_line_segment(&self) -> Option<Segment> {
980 if self.source_length == 0 {
981 return None;
982 }
983 if self.pos.start() < self.source_length {
984 return Some(self.pos);
985 }
986 None
987 }
988
989 fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)> {
990 if self.source_length == 0 {
991 return None;
992 }
993 if self.pos.start() < self.source_length {
994 return Some((self.pos.bytes(self.source), self.pos));
995 }
996 None
997 }
998
999 fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)> {
1000 if self.source_length == 0 {
1001 return None;
1002 }
1003 if self.pos.start() < self.source_length {
1004 return Some((self.pos.str(self.source), self.pos));
1005 }
1006 None
1007 }
1008
1009 fn advance(&mut self, n: usize) {
1010 if self.source_length == 0 {
1011 return;
1012 }
1013
1014 self.line_offset = None;
1015 if n < self.pos.len() && self.pos.padding() == 0 {
1016 self.pos.start += n;
1017 return;
1018 }
1019 let mut n = n;
1020 while n > 0 && self.pos.start < self.source_length {
1021 if self.pos.padding != 0 {
1022 self.pos.padding -= 1;
1023 n -= 1;
1024 continue;
1025 }
1026 if self.bsource[self.pos.start] == b'\n' {
1027 self.advance_line();
1028 n -= 1;
1029 continue;
1030 }
1031
1032 self.pos.start += 1;
1033 n -= 1;
1034 }
1035 }
1036
1037 fn advance_and_set_padding(&mut self, n: usize, padding: usize) {
1038 self.advance(n);
1039 if padding > self.pos.padding() {
1040 self.set_padding(padding);
1041 }
1042 }
1043
1044 fn advance_line(&mut self) {
1045 self.line_offset = None;
1046 if self.source_length == 0 || self.pos.start >= self.source_length {
1047 return;
1048 }
1049
1050 if self.line.is_some() {
1051 self.pos.start = self.pos.stop;
1052 if self.pos.start >= self.source_length {
1053 return;
1054 }
1055 self.pos.stop = self.source_length;
1056 if self.bsource[self.pos.start] != b'\n' {
1057 if let Some(i) = memchr(b'\n', &self.bsource[self.pos.start..]) {
1058 self.pos.stop = self.pos.start + i + 1;
1059 }
1060 } else {
1061 self.pos.stop = self.pos.start + 1;
1062 }
1063 self.line = Some(self.line.unwrap() + 1);
1064 } else {
1065 if let Some(i) = memchr(b'\n', self.bsource) {
1066 self.pos = (0, i + 1).into();
1067 } else {
1068 self.pos = (0, self.source_length).into();
1069 }
1070 self.line = Some(0);
1071 }
1072 self.head = self.pos.start;
1073 self.pos.padding = 0;
1074 }
1075
1076 fn advance_to_eol(&mut self) {
1077 if self.source_length == 0 || self.pos.start >= self.source_length {
1078 return;
1079 }
1080
1081 self.line_offset = None;
1082 if let Some(i) = memchr(b'\n', &self.bsource[self.pos.start..]) {
1083 self.pos.start += i;
1084 } else {
1085 self.pos.start = self.source_length;
1086 }
1087 self.pos.padding = 0;
1088 }
1089
1090 fn line_offset(&mut self) -> usize {
1091 if self.line_offset.is_none() {
1092 let mut v = 0;
1093 for i in self.head..self.pos.start {
1094 if self.bsource[i] == b'\t' {
1095 v += util::tab_width(v);
1096 } else {
1097 v += 1;
1098 }
1099 }
1100 v -= self.pos.padding();
1101 self.line_offset = Some(v);
1102 }
1103 self.line_offset.unwrap_or(0)
1104 }
1105
1106 fn precending_charater(&self) -> char {
1107 if self.pos.padding() != 0 {
1108 return ' ';
1109 }
1110 if self.pos.start() == 0 {
1111 return '\n';
1112 }
1113 let mut i = self.pos.start() - 1;
1114 loop {
1115 if let Some(l) = utf8_len(self.bsource[i]) {
1116 if l == 1 {
1117 return self.bsource[i] as char;
1118 }
1119 return str::from_utf8(&self.bsource[i..i + l])
1120 .ok()
1121 .and_then(|s| s.chars().next())
1122 .unwrap_or('\u{FFFD}');
1123 }
1124 i -= 1;
1125 if i == 0 {
1126 break;
1127 }
1128 }
1129 '\u{FFFD}'
1130 }
1131}
1132
1133pub struct BlockReader<'a> {
1139 source: &'a str,
1140 bsource: &'a [u8],
1141 block: &'a Block,
1142 line: Option<usize>,
1143 pos: Segment,
1144 head: usize,
1145 last: usize,
1146 line_offset: Option<usize>,
1147}
1148
1149impl<'a> BlockReader<'a> {
1150 pub fn new(source: &'a str, block: &'a Block) -> Self {
1152 let mut b = BlockReader {
1153 source,
1154 bsource: source.as_bytes(),
1155 block,
1156 line: None,
1157 pos: Segment::new(0, 0),
1158 head: 0,
1159 last: 0,
1160 line_offset: None,
1161 };
1162 b.reset(block);
1163 b
1164 }
1165
1166 pub unsafe fn new_unchecked(source: &'a [u8], block: &'a Block) -> Self {
1171 Self::new(core::str::from_utf8_unchecked(source), block)
1172 }
1173
1174 pub fn reset(&mut self, lines: &'a Block) {
1176 self.block = lines;
1177 self.reset_position();
1178 }
1179
1180 pub fn between_current(&mut self, line: usize, pos: Segment) -> MultilineValue {
1183 if line == self.line.unwrap_or(0) {
1184 let seg = self.block[line];
1185 if pos.start() >= seg.start() && self.pos.start() <= seg.stop() {
1186 return block_to_values(BetweenBlockIterator::single(
1187 pos.start()..self.pos.start(),
1188 ));
1189 }
1190 }
1191 block_to_values(BetweenBlockIterator::multi(
1192 BlockReader {
1193 source: self.source,
1194 bsource: self.bsource,
1195 block: self.block,
1196 line: self.line,
1197 pos: self.pos,
1198 head: self.head,
1199 last: self.last,
1200 line_offset: self.line_offset,
1201 },
1202 line,
1203 pos,
1204 ))
1205 }
1206
1207 pub fn between(&self, range: Range<usize>) -> MultilineValue {
1209 let from_line = binary_search_block_pos(self.block, range.start).unwrap_or(0);
1210 let mut from_pos = self.block[from_line];
1211 if range.start >= from_pos.start() && range.end <= from_pos.stop() {
1212 return block_to_values(BetweenBlockIterator::single(range));
1213 }
1214 let to_line =
1215 binary_search_block_pos(self.block, range.end).unwrap_or(self.block.len() - 1);
1216 let mut to_pos = self.block[to_line];
1217 to_pos.start = range.end;
1218 from_pos.start = range.start;
1219
1220 block_to_values(BetweenBlockIterator::multi(
1221 BlockReader {
1222 source: self.source,
1223 bsource: self.bsource,
1224 block: self.block,
1225 line: Some(to_line),
1226 pos: to_pos,
1227 head: 0,
1228 last: 0,
1229 line_offset: None,
1230 },
1231 from_line,
1232 from_pos,
1233 ))
1234 }
1235}
1236
1237struct MultilineBetweenBlock<'a> {
1238 reader: BlockReader<'a>,
1239 start_line: usize,
1240 start_pos: Segment,
1241 current_line: usize,
1242 current_pos: Segment,
1243}
1244
1245struct BetweenBlockIterator<'a> {
1246 multi: Option<MultilineBetweenBlock<'a>>,
1247 single: Option<Range<usize>>,
1248 done: bool,
1249}
1250
1251impl<'a> BetweenBlockIterator<'a> {
1252 fn multi(mut reader: BlockReader<'a>, line: usize, pos: Segment) -> BetweenBlockIterator<'a> {
1253 let (current_line, current_pos) = reader.position();
1254 reader.set_position(line, pos);
1255 BetweenBlockIterator {
1256 multi: Some(MultilineBetweenBlock {
1257 reader,
1258 start_line: line,
1259 start_pos: pos,
1260 current_line,
1261 current_pos,
1262 }),
1263 single: None,
1264 done: false,
1265 }
1266 }
1267
1268 fn single(range: Range<usize>) -> BetweenBlockIterator<'a> {
1269 BetweenBlockIterator {
1270 multi: None,
1271 single: Some(range),
1272 done: false,
1273 }
1274 }
1275}
1276
1277impl<'a> Iterator for BetweenBlockIterator<'a> {
1278 type Item = Segment;
1279
1280 fn next(&mut self) -> Option<Self::Item> {
1281 if self.done {
1282 return None;
1283 }
1284 if let Some(s) = &self.single {
1285 self.done = true;
1286 return Some((s.start, s.end).into());
1287 }
1288 if let Some(m) = &mut self.multi {
1289 let (ln, _) = m.reader.position();
1290 let (_, segment) = m.reader.peek_line_bytes()?;
1291 let start = if ln == m.start_line {
1292 m.start_pos.start()
1293 } else {
1294 segment.start()
1295 };
1296 let stop = if ln == m.current_line {
1297 m.current_pos.start()
1298 } else {
1299 segment.stop()
1300 };
1301 let seg = Segment::new(start, stop);
1302 if ln == m.current_line {
1303 m.reader.advance(stop - start);
1304 self.done = true;
1305 }
1306 m.reader.advance_line();
1307 return Some(seg);
1308 }
1309 None
1310 }
1311}
1312
1313impl<'a> Reader<'a> for BlockReader<'a> {
1314 fn source(&self) -> &'a str {
1315 self.source
1316 }
1317
1318 fn position(&self) -> (usize, Segment) {
1319 (self.line.unwrap_or(0), self.pos)
1320 }
1321
1322 fn reset_position(&mut self) {
1323 self.line = None;
1324 self.head = 0;
1325 self.last = 0;
1326 self.line_offset = None;
1327 self.pos.start = 0;
1328 self.pos.stop = 0;
1329 self.pos.padding = 0;
1330 self.pos.force_newline = false;
1331 if let Some(l) = self.block.last() {
1332 self.last = l.stop;
1333 }
1334 self.advance_line();
1335 }
1336
1337 fn set_position(&mut self, line: usize, pos: Segment) {
1338 self.line_offset = None;
1339 self.line = Some(line);
1340 self.pos = pos;
1341 if line < self.block.len() {
1342 self.head = self.block[line].start;
1343 }
1344 }
1345
1346 fn set_padding(&mut self, padding: usize) {
1347 self.line_offset = None;
1348 self.pos.padding = padding as u8;
1349 }
1350
1351 fn peek_byte(&self) -> u8 {
1352 if self.bsource.is_empty() || self.block.is_empty() {
1353 return EOS;
1354 }
1355 if self.pos.padding() != 0 {
1356 return SPACE[0];
1357 }
1358 let l = self.line.unwrap();
1359 if self.pos.is_empty() {
1360 if l < self.block.len() - 1 {
1361 let next = &self.block[l + 1];
1362 if next.padding() != 0 {
1363 return SPACE[0];
1364 }
1365 if next.start < self.bsource.len() {
1366 return self.bsource[next.start];
1367 }
1368 }
1369 return EOS;
1370 } else if self.pos.start < self.bsource.len() {
1371 return self.bsource[self.pos.start];
1372 }
1373 EOS
1374 }
1375
1376 fn peek_line_segment(&self) -> Option<Segment> {
1377 if self.bsource.is_empty() || self.block.is_empty() {
1378 return None;
1379 }
1380 let l = self.line.unwrap();
1381 if self.pos.is_empty() {
1382 if l < self.block.len() - 1 {
1383 let s = self.block[l + 1].start;
1384 if s < self.bsource.len() {
1385 return Some(self.block[l + 1]);
1386 }
1387 }
1388 return None;
1389 } else if self.pos.start < self.bsource.len() {
1390 return Some(self.pos);
1391 }
1392 None
1393 }
1394
1395 fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)> {
1396 if self.bsource.is_empty() || self.block.is_empty() {
1397 return None;
1398 }
1399 let l = self.line.unwrap();
1400 if self.pos.is_empty() {
1401 if l < self.block.len() - 1 {
1402 let s = self.block[l + 1].start;
1403 if s < self.bsource.len() {
1404 return Some((self.block[l + 1].bytes(self.source), self.block[l + 1]));
1405 }
1406 }
1407 return None;
1408 } else if self.pos.start < self.bsource.len() {
1409 return Some((self.pos.bytes(self.source), self.pos));
1410 }
1411 None
1412 }
1413
1414 fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)> {
1415 if self.bsource.is_empty() || self.block.is_empty() {
1416 return None;
1417 }
1418 let l = self.line.unwrap();
1419 if self.pos.is_empty() {
1420 if l < self.block.len() - 1 {
1421 let s = self.block[l + 1].start;
1422 if s < self.bsource.len() {
1423 return Some((self.block[l + 1].str(self.source), self.block[l + 1]));
1424 }
1425 }
1426 return None;
1427 } else if self.pos.start < self.bsource.len() {
1428 return Some((self.pos.str(self.source), self.pos));
1429 }
1430 None
1431 }
1432
1433 fn advance(&mut self, n: usize) {
1434 if self.bsource.is_empty() || self.block.is_empty() {
1435 return;
1436 }
1437 self.line_offset = None;
1438 if n < self.pos.len() && self.pos.padding() == 0 {
1439 self.pos.start += n;
1440 return;
1441 }
1442 let mut n = n;
1443 while n > 0 && self.pos.start < self.last {
1444 if self.pos.padding != 0 {
1445 self.pos.padding -= 1;
1446 n -= 1;
1447 continue;
1448 }
1449 if self.pos.start >= self.pos.stop - 1 && self.pos.stop < self.last {
1450 self.advance_line();
1451 n -= 1;
1452 continue;
1453 }
1454
1455 self.pos.start += 1;
1456 n -= 1;
1457 }
1458 }
1459
1460 fn advance_and_set_padding(&mut self, n: usize, padding: usize) {
1461 self.advance(n);
1462 if padding > self.pos.padding() {
1463 self.set_padding(padding);
1464 }
1465 }
1466
1467 fn advance_line(&mut self) {
1468 if self.bsource.is_empty() || self.block.is_empty() {
1469 return;
1470 }
1471 let l = match self.line {
1472 Some(l) => l + 1,
1473 None => 0,
1474 };
1475 if l < self.block.len() {
1476 self.set_position(l, self.block[l]);
1477 } else {
1478 self.pos.start = self.source().len();
1479 self.pos.stop = self.pos.start;
1480 self.pos.padding = 0;
1481 }
1482 }
1483
1484 fn advance_to_eol(&mut self) {
1485 if self.bsource.is_empty() || self.block.is_empty() {
1486 return;
1487 }
1488 self.line_offset = None;
1489 let c = self.bsource[self.pos.stop - 1];
1490 if c == b'\n' {
1491 self.pos.start = self.pos.stop - 1;
1492 } else {
1493 self.pos.start = self.pos.stop;
1494 }
1495 }
1496
1497 fn line_offset(&mut self) -> usize {
1498 if self.bsource.is_empty() || self.block.is_empty() {
1499 return 0;
1500 }
1501 if self.line_offset.is_none() {
1502 let mut v = 0;
1503 for i in self.head..self.pos.start {
1504 if self.bsource[i] == b'\t' {
1505 v += util::tab_width(v);
1506 } else {
1507 v += 1;
1508 }
1509 }
1510 v -= self.pos.padding();
1511 self.line_offset = Some(v);
1512 }
1513 self.line_offset.unwrap_or(0)
1514 }
1515
1516 fn precending_charater(&self) -> char {
1517 if self.pos.padding() != 0 {
1518 return ' ';
1519 }
1520 if self.pos.start() == 0 {
1521 return '\n';
1522 }
1523 if self.block.is_empty() {
1524 return '\n';
1525 }
1526 let first_line = &self.block[0];
1527 if self.line.unwrap_or(0) == 0 && self.pos.start() <= first_line.start() {
1528 return '\n';
1529 }
1530
1531 let mut i = self.pos.start() - 1;
1532 loop {
1533 if let Some(l) = utf8_len(self.bsource[i]) {
1534 if l == 1 {
1535 return self.bsource[i] as char;
1536 }
1537 return str::from_utf8(&self.bsource[i..i + l])
1538 .ok()
1539 .and_then(|s| s.chars().next())
1540 .unwrap_or('\u{FFFD}');
1541 }
1542 i -= 1;
1543 if i == 0 {
1544 break;
1545 }
1546 }
1547 if i == 0 {
1548 return '\n';
1549 }
1550 '\u{FFFD}'
1551 }
1552}
1553#[cfg(test)]
1560mod tests {
1561 use super::*;
1562
1563 #[allow(unused_imports)]
1564 #[cfg(all(not(feature = "std"), feature = "no-std-unix-debug"))]
1565 use crate::println;
1566
1567 #[test]
1568 fn test_segment() {
1569 let buffer = "Hello, world!";
1570 let segment: Segment = (0, 5).into();
1571 let s: &[u8] = &segment.bytes(buffer);
1572 assert_eq!(s, b"Hello");
1573
1574 let segment_with_padding = Segment::new_with_padding(0, 5, 3);
1575 let s: &[u8] = &segment_with_padding.bytes(buffer);
1576 assert_eq!(s, b" Hello");
1577 }
1578
1579 #[test]
1580 fn test_raw() {
1581 let buffer = "Hello, world!";
1582 let index = Value::from((0, 5));
1583 let s: &[u8] = index.bytes(buffer);
1584 assert_eq!(s, b"Hello");
1585
1586 let raw_string = Value::from("Hello");
1587 let s: &[u8] = raw_string.bytes(buffer);
1588 assert_eq!(s, b"Hello");
1589
1590 let str: &str = index.str(buffer);
1591 assert_eq!(str, "Hello");
1592
1593 let string = String::from("Hello");
1594 let v = Value::from(string.as_str());
1595 assert_eq!(v.str(buffer), "Hello");
1596 }
1597
1598 #[test]
1599 fn test_bytes_reader() {
1600 let buffer = "Hello, world!\nThis is a test.\n";
1601 let mut reader = BasicReader::new(buffer);
1602 assert_eq!(reader.peek_byte(), b'H');
1603
1604 if let Some((line, segment)) = reader.peek_line_bytes() {
1605 assert_eq!(line.as_ref(), b"Hello, world!\n");
1606 assert_eq!(segment.start(), 0);
1607 assert_eq!(segment.stop(), 14);
1608 } else {
1609 panic!("Expected a line");
1610 }
1611
1612 reader.advance(7);
1613 assert_eq!(reader.peek_byte(), b'w');
1614
1615 reader.advance_line();
1616 assert_eq!(reader.peek_byte(), b'T');
1617
1618 if let Some((line, segment)) = reader.peek_line_bytes() {
1619 assert_eq!(line.as_ref(), b"This is a test.\n");
1620 assert_eq!(segment.start(), 14);
1621 assert_eq!(segment.stop(), 30);
1622 } else {
1623 panic!("Expected a line");
1624 }
1625
1626 reader.advance(100); assert_eq!(reader.peek_byte(), EOS);
1628 assert!(reader.peek_line_bytes().is_none());
1629 }
1630
1631 #[test]
1632 fn test_bytes_reader_empty() {
1633 let buffer = "";
1634 let mut reader = BasicReader::new(buffer);
1635 assert_eq!(reader.peek_byte(), EOS);
1636 assert!(reader.peek_line_bytes().is_none());
1637 reader.advance(10);
1638 assert_eq!(reader.peek_byte(), EOS);
1639 assert!(reader.peek_line_bytes().is_none());
1640 reader.advance_line();
1641 assert_eq!(reader.peek_byte(), EOS);
1642 assert!(reader.peek_line_bytes().is_none());
1643 }
1644
1645 #[test]
1646 fn test_block_reader() {
1647 let buffer = "Hello, world!\nThis is a test.\n";
1648 let lines = [Segment::new(0, 14), Segment::new_with_padding(14, 30, 2)];
1649 let mut reader = BlockReader::new(buffer, &lines);
1650 assert_eq!(reader.peek_byte(), b'H');
1651
1652 if let Some((line, segment)) = reader.peek_line_bytes() {
1653 assert_eq!(line.as_ref(), b"Hello, world!\n");
1654 assert_eq!(segment.start(), 0);
1655 assert_eq!(segment.stop(), 14);
1656 } else {
1657 panic!("Expected a line");
1658 }
1659
1660 reader.advance(13);
1661 assert_eq!(reader.peek_byte(), b'\n');
1662
1663 reader.advance(1);
1664 assert_eq!(reader.peek_byte(), SPACE[0]);
1665
1666 if let Some((line, segment)) = reader.peek_line_bytes() {
1667 assert_eq!(line.as_ref(), b" This is a test.\n");
1668 assert_eq!(segment.start(), 14);
1669 assert_eq!(segment.stop(), 30);
1670 assert_eq!(segment.padding(), 2);
1671 } else {
1672 panic!("Expected a line");
1673 }
1674
1675 reader.advance(3);
1676 assert_eq!(reader.peek_byte(), b'h');
1677
1678 reader.advance(100); assert_eq!(reader.peek_byte(), EOS);
1680 assert!(reader.peek_line_bytes().is_none());
1681 }
1682
1683 #[test]
1684 fn test_block_reader_empty() {
1685 let buffer = "";
1686 let lines: [Segment; 0] = [];
1687 let mut reader = BlockReader::new(buffer, &lines);
1688 assert_eq!(reader.peek_byte(), EOS);
1689 assert!(reader.peek_line_bytes().is_none());
1690 reader.advance(10);
1691 assert_eq!(reader.peek_byte(), EOS);
1692 assert!(reader.peek_line_bytes().is_none());
1693 reader.advance_line();
1694 assert_eq!(reader.peek_byte(), EOS);
1695 assert!(reader.peek_line_bytes().is_none());
1696 }
1697}
1698
1699