Skip to main content

rushdown/
text.rs

1//! Text related structures and traits.
2
3extern crate alloc;
4
5use core::ops::Range;
6
7use memchr::memchr;
8
9use crate::util::{self, is_blank, is_space, trim_left_space, utf8_len, TinyVec};
10use alloc::borrow::Cow;
11use alloc::string::String;
12use alloc::vec::Vec;
13
14#[allow(unused_imports)]
15#[cfg(all(not(feature = "std"), feature = "no-std-unix-debug"))]
16use crate::println;
17
18const SPACE: &[u8] = b" ";
19
20// Value {{{
21
22//   Value {{{
23
24/// An enum represents a string value that can be either an [`Index`] or a [`String`].
25/// [`Value`] does not handle padding.
26/// Value is used for representing values that can be represented by a single line, such as
27/// link destinations.
28#[derive(Debug, Clone)]
29#[non_exhaustive]
30pub enum Value {
31    /// An Index variant holds a reference to indicies in the source.
32    Index(Index),
33
34    /// A String variant holds a string value.
35    String(String),
36}
37
38impl Value {
39    /// Returns byte slice value.
40    pub fn bytes<'a>(&'a self, source: &'a str) -> &'a [u8] {
41        match self {
42            Value::Index(index) => index.bytes(source),
43            Value::String(s) => s.as_bytes(),
44        }
45    }
46
47    /// Returns str value.
48    pub fn str<'a>(&'a self, source: &'a str) -> &'a str {
49        match self {
50            Value::Index(index) => index.str(source),
51            Value::String(s) => s.as_str(),
52        }
53    }
54
55    /// Returns true if the value is empty, otherwise false.
56    pub fn is_empty(&self) -> bool {
57        match self {
58            Value::Index(index) => index.is_empty(),
59            Value::String(s) => s.is_empty(),
60        }
61    }
62
63    /// Returns the length of the value.
64    pub fn len(&self) -> usize {
65        match self {
66            Value::Index(index) => index.len(),
67            Value::String(s) => s.len(),
68        }
69    }
70}
71
72impl From<&str> for Value {
73    fn from(s: &str) -> Self {
74        Value::String(String::from(s))
75    }
76}
77
78impl From<String> for Value {
79    fn from(s: String) -> Self {
80        Value::String(s)
81    }
82}
83
84impl From<&[u8]> for Value {
85    fn from(s: &[u8]) -> Self {
86        Value::String(String::from_utf8_lossy(s).into_owned())
87    }
88}
89
90impl From<Vec<u8>> for Value {
91    fn from(s: Vec<u8>) -> Self {
92        Value::String(String::from_utf8_lossy(&s).into_owned())
93    }
94}
95
96impl From<&[char]> for Value {
97    fn from(s: &[char]) -> Self {
98        Value::String(s.iter().collect())
99    }
100}
101
102impl From<Cow<'_, [u8]>> for Value {
103    fn from(s: Cow<'_, [u8]>) -> Self {
104        Value::String(String::from_utf8_lossy(&s).into_owned())
105    }
106}
107
108impl From<Cow<'_, str>> for Value {
109    fn from(s: Cow<'_, str>) -> Self {
110        Value::String(s.into_owned())
111    }
112}
113
114impl From<&Value> for Value {
115    fn from(v: &Value) -> Self {
116        match v {
117            Value::Index(index) => Value::Index(*index),
118            Value::String(s) => Value::String(s.clone()),
119        }
120    }
121}
122
123impl From<(usize, usize)> for Value {
124    fn from((start, stop): (usize, usize)) -> Self {
125        Value::Index(Index::new(start, stop))
126    }
127}
128
129impl From<Segment> for Value {
130    fn from(segment: Segment) -> Self {
131        Value::Index(Index::new(segment.start(), segment.stop()))
132    }
133}
134//   }}} Value
135
136//   Index {{{
137
138/// An Index struct holds information about source positions.
139#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
140pub struct Index {
141    start: usize,
142
143    stop: usize,
144}
145
146impl Index {
147    /// Create a new Index with start and stop.
148    pub fn new(start: usize, stop: usize) -> Self {
149        Index { start, stop }
150    }
151
152    /// A Start position of the index.
153    #[inline(always)]
154    pub fn start(&self) -> usize {
155        self.start
156    }
157
158    /// A Stop position of the index.
159    #[inline(always)]
160    pub fn stop(&self) -> usize {
161        self.stop
162    }
163
164    /// Returns the bytes of the index from the source.
165    #[inline(always)]
166    pub fn bytes<'a>(&self, source: &'a str) -> &'a [u8] {
167        &source.as_bytes()[self.start..self.stop]
168    }
169
170    /// Returns the str of the index from the source.
171    ///
172    /// # Safety
173    /// This method does not check the validity of UTF-8 boundaries.
174    #[inline(always)]
175    pub fn str<'a>(&self, source: &'a str) -> &'a str {
176        unsafe { source.get_unchecked(self.start..self.stop) }
177    }
178
179    /// Returns true if the index is empty, otherwise false.
180    #[inline(always)]
181    pub fn is_empty(&self) -> bool {
182        self.start >= self.stop
183    }
184
185    /// Returns a new Index with same value except `stop`.
186    #[inline(always)]
187    pub fn with_start(&self, v: usize) -> Index {
188        Index::new(v, self.stop)
189    }
190
191    /// Returns a new Index with same value except `stop`.
192    #[inline(always)]
193    pub fn with_stop(&self, v: usize) -> Index {
194        Index::new(self.start, v)
195    }
196
197    /// Returns the length of the index.
198    #[inline(always)]
199    pub fn len(&self) -> usize {
200        self.stop - self.start
201    }
202}
203
204impl From<Index> for Value {
205    fn from(index: Index) -> Self {
206        Value::Index(index)
207    }
208}
209
210impl From<(usize, usize)> for Index {
211    fn from((start, stop): (usize, usize)) -> Self {
212        Index::new(start, stop)
213    }
214}
215
216impl From<Segment> for Index {
217    fn from(segment: Segment) -> Self {
218        Index::new(segment.start(), segment.stop())
219    }
220}
221
222//   }}} Index
223
224//   MultilineValue {{{
225
226/// An enum represents a collection of values that can be either a collection of [`Index`] or
227/// a single [`String`].
228/// MultilineValue is used for representing values that can be represented by multiple lines, such as link
229/// titles.
230#[derive(Debug, Clone, Default)]
231#[non_exhaustive]
232pub enum MultilineValue {
233    #[default]
234    Empty,
235    Indices(TinyVec<Index>),
236    String(String),
237}
238
239impl MultilineValue {
240    /// Creates a [`MultilineValue`] from a single index.
241    pub fn from_index(index: Index) -> Self {
242        MultilineValue::Indices(TinyVec::from_single(index))
243    }
244
245    /// Creates a [`MultilineValue`] from a collection of indices.
246    pub fn from_indices(indices: Vec<Index>) -> Self {
247        MultilineValue::Indices(TinyVec::from_vec(indices))
248    }
249
250    /// Creates a [`MultilineValue`] from a string.
251    pub fn from_string(s: String) -> Self {
252        MultilineValue::String(s)
253    }
254
255    /// Returns a str value by concatenating all indices in the collection.
256    /// If this value has multiple indices, it will trim leading spaces of each index except the
257    /// first one.
258    pub fn str<'a>(&'a self, source: &'a str) -> Cow<'a, str> {
259        match self {
260            MultilineValue::Empty => Cow::Borrowed(""),
261            MultilineValue::Indices(indices) => {
262                let first = indices.get(0);
263                let second = indices.get(1);
264                if let Some(f) = first {
265                    if second.is_none() {
266                        return Cow::Borrowed(f.str(source));
267                    }
268                } else {
269                    return Cow::Borrowed("");
270                }
271                let mut result = String::new();
272                result.push_str(first.unwrap().str(source));
273                let b = second.unwrap().bytes(source);
274                result.push_str(unsafe { core::str::from_utf8_unchecked(trim_left_space(b)) });
275                for v in indices.iter().skip(2) {
276                    let b = v.bytes(source);
277                    result.push_str(unsafe { core::str::from_utf8_unchecked(trim_left_space(b)) });
278                }
279                Cow::Owned(result)
280            }
281            MultilineValue::String(s) => Cow::Borrowed(s.as_str()),
282        }
283    }
284
285    /// Returns a bytes value by concatenating all indices in the collection.
286    /// If this value has multiple indices, it will trim leading spaces of each index except the
287    /// first one.
288    pub fn bytes<'a>(&'a self, source: &'a str) -> Cow<'a, [u8]> {
289        match self {
290            MultilineValue::Empty => Cow::Borrowed(&[]),
291            MultilineValue::Indices(indices) => {
292                let first = indices.get(0);
293                let second = indices.get(1);
294                if let Some(f) = first {
295                    if second.is_none() {
296                        return Cow::Borrowed(f.bytes(source));
297                    }
298                } else {
299                    return Cow::Borrowed(&[]);
300                }
301                let mut result = Vec::new();
302                result.extend_from_slice(first.unwrap().bytes(source));
303                result.extend_from_slice(trim_left_space(second.unwrap().bytes(source)));
304                for v in indices.iter().skip(2) {
305                    result.extend_from_slice(trim_left_space(v.bytes(source)));
306                }
307                Cow::Owned(result)
308            }
309            MultilineValue::String(s) => Cow::Borrowed(s.as_bytes()),
310        }
311    }
312}
313
314impl From<String> for MultilineValue {
315    fn from(s: String) -> Self {
316        MultilineValue::String(s)
317    }
318}
319
320impl From<&String> for MultilineValue {
321    fn from(s: &String) -> Self {
322        MultilineValue::String(s.clone())
323    }
324}
325
326impl From<&str> for MultilineValue {
327    fn from(s: &str) -> Self {
328        MultilineValue::String(String::from(s))
329    }
330}
331
332impl From<&[u8]> for MultilineValue {
333    fn from(s: &[u8]) -> Self {
334        MultilineValue::String(String::from_utf8_lossy(s).into_owned())
335    }
336}
337
338impl From<Cow<'_, str>> for MultilineValue {
339    fn from(s: Cow<'_, str>) -> Self {
340        MultilineValue::String(s.into_owned())
341    }
342}
343
344impl From<Cow<'_, [u8]>> for MultilineValue {
345    fn from(s: Cow<'_, [u8]>) -> Self {
346        MultilineValue::String(String::from_utf8_lossy(&s).into_owned())
347    }
348}
349
350impl From<Value> for MultilineValue {
351    fn from(v: Value) -> Self {
352        match v {
353            Value::Index(index) => MultilineValue::Indices(TinyVec::from_single(index)),
354            Value::String(s) => MultilineValue::String(s),
355        }
356    }
357}
358
359impl From<TinyVec<Index>> for MultilineValue {
360    fn from(indices: TinyVec<Index>) -> Self {
361        MultilineValue::Indices(indices)
362    }
363}
364
365//   }}} MuiltineValue
366
367// }}} Value
368
369// Segment {{{
370
371//   Lines {{{
372
373/// An enum represents a collection of segments that can be either a collection of [`Segment`] or
374/// a single [`String`].
375/// Lines is used for representing lines of block elements that can be represented by multiple lines,
376/// such as HTML blocks.
377#[derive(Debug, Clone, Default)]
378#[non_exhaustive]
379pub enum Lines {
380    #[default]
381    Empty,
382    Segments(Vec<Segment>),
383    String(String),
384}
385
386impl Lines {
387    /// Creates a [`Lines`] from a collection of segments.
388    pub fn from_segments(segments: Vec<Segment>) -> Self {
389        Lines::Segments(segments)
390    }
391
392    /// Creates a [`Lines`] from a string.
393    pub fn from_string(s: String) -> Self {
394        Lines::String(s)
395    }
396
397    /// Returns an iterator that iterates over the lines of this [`Lines`] as str.
398    pub fn iter<'a>(&'a self, source: &'a str) -> impl Iterator<Item = Cow<'a, str>> {
399        LinesIter::new(
400            match self {
401                Lines::Empty => LinesIterState::Empty,
402                Lines::Segments(segments) => LinesIterState::Segments(segments.iter()),
403                Lines::String(s) => LinesIterState::String(s.split_inclusive('\n')),
404            },
405            source,
406        )
407    }
408}
409
410impl From<String> for Lines {
411    fn from(s: String) -> Self {
412        Lines::String(s)
413    }
414}
415
416impl From<&String> for Lines {
417    fn from(s: &String) -> Self {
418        Lines::String(s.clone())
419    }
420}
421
422impl From<&str> for Lines {
423    fn from(s: &str) -> Self {
424        Lines::String(String::from(s))
425    }
426}
427
428impl From<&[u8]> for Lines {
429    fn from(s: &[u8]) -> Self {
430        Lines::String(String::from_utf8_lossy(s).into_owned())
431    }
432}
433
434impl From<Vec<Segment>> for Lines {
435    fn from(segments: Vec<Segment>) -> Self {
436        Lines::Segments(segments)
437    }
438}
439
440impl From<&[Segment]> for Lines {
441    fn from(segments: &[Segment]) -> Self {
442        Lines::Segments(segments.to_vec())
443    }
444}
445
446enum LinesIterState<'a> {
447    Empty,
448    Segments(core::slice::Iter<'a, Segment>),
449    String(core::str::SplitInclusive<'a, char>),
450}
451
452/// Iterator that iterates over [`Lines`] as str
453struct LinesIter<'a> {
454    state: LinesIterState<'a>,
455    source: &'a str,
456}
457
458impl<'a> LinesIter<'a> {
459    /// Creates a new LinesIter with the given lines and source.
460    pub fn new(state: LinesIterState<'a>, source: &'a str) -> Self {
461        LinesIter { state, source }
462    }
463}
464
465impl<'a> Iterator for LinesIter<'a> {
466    type Item = Cow<'a, str>;
467
468    #[inline(always)]
469    fn next(&mut self) -> Option<Self::Item> {
470        match &mut self.state {
471            LinesIterState::Empty => None,
472            LinesIterState::Segments(iter) => iter.next().map(|segment| segment.str(self.source)),
473            LinesIterState::String(iter) => iter.next().map(Cow::Borrowed),
474        }
475    }
476}
477
478//   }}} Lines
479
480//   Block {{{
481
482/// Special collection of segments.
483/// Each segment represents a one line.
484/// Each segment does not contain multiple lines.
485pub type Block = [Segment];
486
487fn binary_search_block_pos(block: &Block, pos: usize) -> Option<usize> {
488    let mut left = 0;
489    let mut right = block.len();
490    while left < right {
491        let mid = (left + right) / 2;
492        if block[mid].start() <= pos && pos < block[mid].stop() {
493            return Some(mid);
494        }
495        if pos < block[mid].start() {
496            right = mid;
497        } else {
498            left = mid + 1;
499        }
500    }
501    None
502}
503
504/// Extension trait for [`Block`].
505pub trait BlockExt {
506    /// Returns a collection of values by converting each segment in the block to a value.
507    fn to_values(&self) -> MultilineValue;
508}
509
510impl BlockExt for Block {
511    fn to_values(&self) -> MultilineValue {
512        let first = self.first();
513        let second = self.get(1);
514        if let Some(f) = first {
515            if second.is_none() {
516                return MultilineValue::from_index((f.start(), f.stop()).into());
517            }
518        } else {
519            return MultilineValue::default();
520        }
521        let mut result = Vec::with_capacity(self.len());
522        for v in self.iter() {
523            result.push((v.start(), v.stop()).into());
524        }
525        MultilineValue::from_indices(result)
526    }
527}
528
529pub(crate) fn block_to_values(i: impl IntoIterator<Item = Segment>) -> MultilineValue {
530    let mut b = i.into_iter();
531    let first = b.next();
532    let second = b.next();
533    if let Some(f) = first {
534        if second.is_none() {
535            return MultilineValue::from_index(f.into());
536        }
537    } else {
538        return MultilineValue::default();
539    }
540    let mut result = Vec::with_capacity(2 + b.size_hint().0);
541    result.push(first.unwrap().into());
542    result.push(second.unwrap().into());
543    for segment in b {
544        result.push(segment.into());
545    }
546    MultilineValue::from_indices(result)
547}
548
549//   }}} Block
550
551//   Segment {{{
552
553/// A Segment struct repsents a segment of CommonMark text.
554/// In addition to [`Index`], Segment has padding and force_newline fields.
555#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
556pub struct Segment {
557    start: usize,
558
559    stop: usize,
560
561    padding: u8,
562
563    force_newline: bool,
564}
565
566impl Segment {
567    /// Creates a [`Segment`] with start and stop.
568    pub fn new(start: usize, stop: usize) -> Self {
569        Segment {
570            start,
571            stop,
572            padding: 0,
573            force_newline: false,
574        }
575    }
576
577    /// Create a Segment with start, stop, and padding.
578    pub fn new_with_padding(start: usize, stop: usize, padding: usize) -> Self {
579        Segment {
580            start,
581            stop,
582            padding: padding as u8,
583            force_newline: false,
584        }
585    }
586
587    /// A Start position of the segment.
588    #[inline(always)]
589    pub fn start(&self) -> usize {
590        self.start
591    }
592
593    /// A Stop position of the segment.
594    #[inline(always)]
595    pub fn stop(&self) -> usize {
596        self.stop
597    }
598
599    /// A Padding length of the segment.
600    /// In CommonMark, Tab width is varied corresponding to horizontal position.
601    /// So, padding is used to represent the number of leading spaces that should be inserted
602    /// to align the text.
603    #[inline(always)]
604    pub fn padding(&self) -> usize {
605        self.padding as usize
606    }
607
608    /// A Force newline flag of the segment.
609    #[inline(always)]
610    pub fn force_newline(&self) -> bool {
611        self.force_newline
612    }
613
614    /// Returns the bytes of the segment from the source.
615    pub fn bytes<'a>(&self, source: &'a str) -> Cow<'a, [u8]> {
616        if self.padding == 0
617            && (!self.force_newline || source.as_bytes().get(self.stop - 1) == Some(&b'\n'))
618        {
619            Cow::Borrowed(&source.as_bytes()[self.start..self.stop])
620        } else {
621            let mut result = Vec::with_capacity(self.padding() + self.stop - self.start + 1);
622            result.extend(core::iter::repeat_n(SPACE[0], self.padding()));
623            result.extend_from_slice(&source.as_bytes()[self.start..self.stop]);
624            if self.force_newline && !result.is_empty() && *result.last().unwrap() != b'\n' {
625                result.push(b'\n');
626            }
627            Cow::Owned(result)
628        }
629    }
630
631    /// Returns the str of the segment from the source as a string.
632    ///
633    /// # Safety
634    /// This method does not check the validity of UTF-8 boundaries.
635    pub fn str<'a>(&self, source: &'a str) -> Cow<'a, str> {
636        if self.padding == 0
637            && (!self.force_newline || source.as_bytes().get(self.stop - 1) == Some(&b'\n'))
638        {
639            unsafe { Cow::Borrowed(source.get_unchecked(self.start..self.stop)) }
640        } else {
641            let mut result = String::with_capacity(self.padding() + self.stop - self.start + 1);
642            result.extend(core::iter::repeat_n(' ', self.padding()));
643            unsafe { result.push_str(source.get_unchecked(self.start..self.stop)) };
644            if self.force_newline && !result.is_empty() && result.as_bytes().last() != Some(&b'\n')
645            {
646                result.push('\n');
647            }
648            Cow::Owned(result)
649        }
650    }
651
652    /// Returns the length of the segment.
653    #[inline(always)]
654    pub fn len(&self) -> usize {
655        self.stop - self.start + self.padding()
656    }
657
658    /// Returns a segment between this segment and the given segment.
659    pub fn between(&self, other: Segment) -> Segment {
660        if self.stop != other.stop {
661            panic!("invalid state");
662        }
663        Segment::new_with_padding(
664            self.start,
665            other.start,
666            (self.padding - other.padding) as usize,
667        )
668    }
669
670    /// Returns true if this segment is empty, otherwise false.
671    #[inline(always)]
672    pub fn is_empty(&self) -> bool {
673        self.start >= self.stop && self.padding == 0
674    }
675
676    /// Returns true if this segment is blank (only space characters), otherwise false.
677    pub fn is_blank(&self, source: &str) -> bool {
678        let v = &source.as_bytes()[self.start..self.stop];
679        is_blank(v)
680    }
681
682    /// Returns a new segment by slicing off all trailing space characters.
683    pub fn trim_right_space(&self, source: &str) -> Segment {
684        let v = &source.as_bytes()[self.start..self.stop];
685        let l = util::trim_right_space_length(v);
686        if l == v.len() {
687            Segment::new(self.start, self.start)
688        } else {
689            Segment::new_with_padding(self.start, self.stop - l, self.padding as usize)
690        }
691    }
692
693    /// Returns a new segment by slicing off all leading space characters including padding.
694    pub fn trim_left_space(&self, source: &str) -> Segment {
695        let v = &source.as_bytes()[self.start..self.stop];
696        let l = util::trim_left_space_length(v);
697        Segment::new(self.start + l, self.stop)
698    }
699
700    /// Returns a new segment by slicing off leading space
701    /// characters until the given width.
702    pub fn trim_left_space_width(&self, mut width: isize, source: &str) -> Segment {
703        let mut padding = self.padding as isize;
704        while width > 0 && padding > 0 {
705            width -= 1;
706            padding -= 1;
707        }
708        if width == 0 {
709            return Segment::new_with_padding(self.start, self.stop, padding as usize);
710        }
711        let v = &source.as_bytes()[self.start..self.stop];
712        let mut start = self.start;
713        for &c in v {
714            if start >= self.stop - 1 || width == 0 {
715                break;
716            }
717            if c == b' ' {
718                width -= 1;
719            } else if c == b'\t' {
720                width -= 4;
721            } else {
722                break;
723            }
724            start += 1;
725        }
726        if width < 0 {
727            padding = -width;
728        }
729        Segment::new_with_padding(start, self.stop, padding as usize)
730    }
731
732    /// Returns a new Segment with same value except `start`.
733    #[inline(always)]
734    pub fn with_start(&self, v: usize) -> Segment {
735        Segment::new_with_padding(v, self.stop, self.padding as usize)
736    }
737
738    /// Returns a new Segment with same value except `stop`.
739    #[inline(always)]
740    pub fn with_stop(&self, v: usize) -> Segment {
741        Segment::new_with_padding(self.start, v, self.padding as usize)
742    }
743
744    /// Returns a new Segment with padding set to given value.
745    #[inline(always)]
746    pub fn with_padding(&self, v: usize) -> Segment {
747        Segment::new_with_padding(self.start, self.stop, v)
748    }
749
750    /// Returns a new Segment with force_newline set to `v`.
751    #[inline(always)]
752    pub fn with_force_newline(&self, v: bool) -> Segment {
753        Segment {
754            start: self.start,
755            stop: self.stop,
756            padding: self.padding,
757            force_newline: v,
758        }
759    }
760
761    /// Returns an Index with same start and stop as this segment.
762    #[inline(always)]
763    pub fn to_index(&self) -> Index {
764        Index::new(self.start, self.stop)
765    }
766}
767
768impl From<(usize, usize)> for Segment {
769    fn from((start, stop): (usize, usize)) -> Self {
770        Segment::new(start, stop)
771    }
772}
773
774impl From<(usize, usize, usize)> for Segment {
775    fn from((start, stop, padding): (usize, usize, usize)) -> Self {
776        Segment::new_with_padding(start, stop, padding)
777    }
778}
779
780impl From<Index> for Segment {
781    fn from(index: Index) -> Self {
782        Segment::new(index.start(), index.stop())
783    }
784}
785
786impl From<Segment> for Range<usize> {
787    fn from(segment: Segment) -> Self {
788        segment.start()..segment.stop()
789    }
790}
791
792//   }}} Segment
793
794// }}} Segment
795
796// Reader {{{
797
798/// Indicates the end of string.
799pub const EOS: u8 = 0xff;
800
801/// A Reader trait represents a reader that can read and peek bytes.
802pub trait Reader<'a> {
803    /// Returns the source str.
804    fn source(&self) -> &'a str;
805
806    /// Returns current line number and position.
807    fn position(&self) -> (usize, Segment);
808
809    /// Resets the internal pointer to the beginning of the source.
810    fn reset_position(&mut self);
811
812    /// Sets current line number and position.
813    fn set_position(&mut self, line: usize, pos: Segment);
814
815    /// Sets padding to the reader.
816    fn set_padding(&mut self, padding: usize);
817
818    /// Reads the next byte without advancing the position.
819    /// Returns [`EOS`] if the end of the source is reached.
820    fn peek_byte(&self) -> u8;
821
822    /// Reads the next line segment without advancing the position.
823    /// Returns None if the end of the source is reached.
824    fn peek_line_segment(&self) -> Option<Segment>;
825
826    /// Reads the next line without advancing the position.
827    /// Returns None if the end of the source is reached.
828    fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)>;
829
830    /// Reads the next line without advancing the position.
831    /// Returns None if the end of the source is reached.
832    fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)>;
833
834    /// Advances the internal pointer.
835    fn advance(&mut self, n: usize);
836
837    /// Advances the internal pointer and add padding to the
838    /// reader.
839    fn advance_and_set_padding(&mut self, n: usize, padding: usize);
840
841    /// Advances the internal pointer to the next line head.
842    fn advance_line(&mut self);
843
844    /// Advances the internal pointer to the end of line.
845    /// If the line ends with a newline, it will be included in the segment.
846    /// If the line ends with EOF, it will not be included in the segment.
847    fn advance_to_eol(&mut self);
848
849    /// Returns a distance from the line head to current position.
850    fn line_offset(&mut self) -> usize;
851
852    /// Returns a character just before current internal pointer.
853    fn precending_charater(&self) -> char;
854
855    /// Skips blank lines and advances the internal pointer to the next non-blank line.
856    /// Returns None if the end of the source is reached.
857    fn skip_blank_lines(&mut self) -> Option<(Cow<'a, [u8]>, Segment)> {
858        loop {
859            match self.peek_line_bytes() {
860                None => return None,
861                Some((line, seg)) => {
862                    if is_blank(&line) {
863                        self.advance_line();
864                        continue;
865                    }
866                    return Some((line, seg));
867                }
868            }
869        }
870    }
871
872    /// Skips bytes while the given function returns true.
873    fn skip_while<F>(&mut self, mut f: F) -> usize
874    where
875        F: FnMut(u8) -> bool,
876    {
877        let mut i = 0usize;
878        loop {
879            let b = self.peek_byte();
880            if b == EOS {
881                break;
882            }
883            if f(b) {
884                i += 1;
885                self.advance(1);
886                continue;
887            }
888            break;
889        }
890        i
891    }
892
893    /// Skips space characters.
894    fn skip_spaces(&mut self) -> usize {
895        self.skip_while(is_space)
896    }
897}
898
899//   BasicReader {{{
900
901/// [`Reader`] implementation for byte slices.
902pub struct BasicReader<'a> {
903    source: &'a str,
904    bsource: &'a [u8],
905    source_length: usize,
906    line: Option<usize>,
907    pos: Segment,
908    head: usize,
909    line_offset: Option<usize>,
910}
911
912impl<'a> BasicReader<'a> {
913    /// Creates a new BasicReader with the given source.
914    pub fn new(source: &'a str) -> Self {
915        let bsource: &[u8] = source.as_bytes();
916        let source_length = bsource.len();
917        let mut b = BasicReader {
918            source,
919            bsource,
920            source_length,
921            line: None,
922            pos: Segment::new(0, 0),
923            head: 0,
924            line_offset: None,
925        };
926        b.reset_position();
927        b
928    }
929
930    /// Creates a new BasicReader with the given byte slice without UTF-8 validation.
931    ///
932    /// # Safety
933    /// - The caller must ensure that the given byte slice is valid UTF-8.
934    pub unsafe fn new_unchecked(source: &'a [u8]) -> Self {
935        Self::new(core::str::from_utf8_unchecked(source))
936    }
937}
938
939impl<'a> Reader<'a> for BasicReader<'a> {
940    fn source(&self) -> &'a str {
941        self.source
942    }
943
944    fn position(&self) -> (usize, Segment) {
945        (self.line.unwrap_or(0), self.pos)
946    }
947
948    fn reset_position(&mut self) {
949        self.line = None;
950        self.head = 0;
951        self.line_offset = None;
952        self.advance_line();
953    }
954
955    fn set_position(&mut self, line: usize, pos: Segment) {
956        self.line = Some(line);
957        self.pos = pos;
958        self.head = pos.start;
959        self.line_offset = None;
960    }
961
962    fn set_padding(&mut self, padding: usize) {
963        self.pos.padding = padding as u8;
964    }
965
966    fn peek_byte(&self) -> u8 {
967        if self.source_length == 0 {
968            return EOS;
969        }
970        if self.pos.padding() != 0 {
971            return SPACE[0];
972        }
973        if self.pos.start() < self.source_length {
974            return self.bsource[self.pos.start()];
975        }
976        EOS
977    }
978
979    fn peek_line_segment(&self) -> Option<Segment> {
980        if self.source_length == 0 {
981            return None;
982        }
983        if self.pos.start() < self.source_length {
984            return Some(self.pos);
985        }
986        None
987    }
988
989    fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)> {
990        if self.source_length == 0 {
991            return None;
992        }
993        if self.pos.start() < self.source_length {
994            return Some((self.pos.bytes(self.source), self.pos));
995        }
996        None
997    }
998
999    fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)> {
1000        if self.source_length == 0 {
1001            return None;
1002        }
1003        if self.pos.start() < self.source_length {
1004            return Some((self.pos.str(self.source), self.pos));
1005        }
1006        None
1007    }
1008
1009    fn advance(&mut self, n: usize) {
1010        if self.source_length == 0 {
1011            return;
1012        }
1013
1014        self.line_offset = None;
1015        if n < self.pos.len() && self.pos.padding() == 0 {
1016            self.pos.start += n;
1017            return;
1018        }
1019        let mut n = n;
1020        while n > 0 && self.pos.start < self.source_length {
1021            if self.pos.padding != 0 {
1022                self.pos.padding -= 1;
1023                n -= 1;
1024                continue;
1025            }
1026            if self.bsource[self.pos.start] == b'\n' {
1027                self.advance_line();
1028                n -= 1;
1029                continue;
1030            }
1031
1032            self.pos.start += 1;
1033            n -= 1;
1034        }
1035    }
1036
1037    fn advance_and_set_padding(&mut self, n: usize, padding: usize) {
1038        self.advance(n);
1039        if padding > self.pos.padding() {
1040            self.set_padding(padding);
1041        }
1042    }
1043
1044    fn advance_line(&mut self) {
1045        self.line_offset = None;
1046        if self.source_length == 0 || self.pos.start >= self.source_length {
1047            return;
1048        }
1049
1050        if self.line.is_some() {
1051            self.pos.start = self.pos.stop;
1052            if self.pos.start >= self.source_length {
1053                return;
1054            }
1055            self.pos.stop = self.source_length;
1056            if self.bsource[self.pos.start] != b'\n' {
1057                if let Some(i) = memchr(b'\n', &self.bsource[self.pos.start..]) {
1058                    self.pos.stop = self.pos.start + i + 1;
1059                }
1060            } else {
1061                self.pos.stop = self.pos.start + 1;
1062            }
1063            self.line = Some(self.line.unwrap() + 1);
1064        } else {
1065            if let Some(i) = memchr(b'\n', self.bsource) {
1066                self.pos = (0, i + 1).into();
1067            } else {
1068                self.pos = (0, self.source_length).into();
1069            }
1070            self.line = Some(0);
1071        }
1072        self.head = self.pos.start;
1073        self.pos.padding = 0;
1074    }
1075
1076    fn advance_to_eol(&mut self) {
1077        if self.source_length == 0 || self.pos.start >= self.source_length {
1078            return;
1079        }
1080
1081        self.line_offset = None;
1082        if let Some(i) = memchr(b'\n', &self.bsource[self.pos.start..]) {
1083            self.pos.start += i;
1084        } else {
1085            self.pos.start = self.source_length;
1086        }
1087        self.pos.padding = 0;
1088    }
1089
1090    fn line_offset(&mut self) -> usize {
1091        if self.line_offset.is_none() {
1092            let mut v = 0;
1093            for i in self.head..self.pos.start {
1094                if self.bsource[i] == b'\t' {
1095                    v += util::tab_width(v);
1096                } else {
1097                    v += 1;
1098                }
1099            }
1100            v -= self.pos.padding();
1101            self.line_offset = Some(v);
1102        }
1103        self.line_offset.unwrap_or(0)
1104    }
1105
1106    fn precending_charater(&self) -> char {
1107        if self.pos.padding() != 0 {
1108            return ' ';
1109        }
1110        if self.pos.start() == 0 {
1111            return '\n';
1112        }
1113        let mut i = self.pos.start() - 1;
1114        loop {
1115            if let Some(l) = utf8_len(self.bsource[i]) {
1116                if l == 1 {
1117                    return self.bsource[i] as char;
1118                }
1119                return str::from_utf8(&self.bsource[i..i + l])
1120                    .ok()
1121                    .and_then(|s| s.chars().next())
1122                    .unwrap_or('\u{FFFD}');
1123            }
1124            i -= 1;
1125            if i == 0 {
1126                break;
1127            }
1128        }
1129        '\u{FFFD}'
1130    }
1131}
1132
1133//   }}} BasicReader
1134
1135//   BlockReader {{{
1136
1137/// [`Reader`] implementation for given blocks.
1138pub struct BlockReader<'a> {
1139    source: &'a str,
1140    bsource: &'a [u8],
1141    block: &'a Block,
1142    line: Option<usize>,
1143    pos: Segment,
1144    head: usize,
1145    last: usize,
1146    line_offset: Option<usize>,
1147}
1148
1149impl<'a> BlockReader<'a> {
1150    /// Creates a new BlockReader with the given source and block.
1151    pub fn new(source: &'a str, block: &'a Block) -> Self {
1152        let mut b = BlockReader {
1153            source,
1154            bsource: source.as_bytes(),
1155            block,
1156            line: None,
1157            pos: Segment::new(0, 0),
1158            head: 0,
1159            last: 0,
1160            line_offset: None,
1161        };
1162        b.reset(block);
1163        b
1164    }
1165
1166    /// Creates a new BlockReader with the given byte slice without UTF-8 validation.
1167    ///
1168    /// # Safety
1169    /// - The caller must ensure that the given byte slice is valid UTF-8.
1170    pub unsafe fn new_unchecked(source: &'a [u8], block: &'a Block) -> Self {
1171        Self::new(core::str::from_utf8_unchecked(source), block)
1172    }
1173
1174    /// Resets the reader with given new block.
1175    pub fn reset(&mut self, lines: &'a Block) {
1176        self.block = lines;
1177        self.reset_position();
1178    }
1179
1180    /// Returns Values that contains value between the current position and the given
1181    /// position.
1182    pub fn between_current(&mut self, line: usize, pos: Segment) -> MultilineValue {
1183        if line == self.line.unwrap_or(0) {
1184            let seg = self.block[line];
1185            if pos.start() >= seg.start() && self.pos.start() <= seg.stop() {
1186                return block_to_values(BetweenBlockIterator::single(
1187                    pos.start()..self.pos.start(),
1188                ));
1189            }
1190        }
1191        block_to_values(BetweenBlockIterator::multi(
1192            BlockReader {
1193                source: self.source,
1194                bsource: self.bsource,
1195                block: self.block,
1196                line: self.line,
1197                pos: self.pos,
1198                head: self.head,
1199                last: self.last,
1200                line_offset: self.line_offset,
1201            },
1202            line,
1203            pos,
1204        ))
1205    }
1206
1207    /// Returns Values that contains segments between the given range.
1208    pub fn between(&self, range: Range<usize>) -> MultilineValue {
1209        let from_line = binary_search_block_pos(self.block, range.start).unwrap_or(0);
1210        let mut from_pos = self.block[from_line];
1211        if range.start >= from_pos.start() && range.end <= from_pos.stop() {
1212            return block_to_values(BetweenBlockIterator::single(range));
1213        }
1214        let to_line =
1215            binary_search_block_pos(self.block, range.end).unwrap_or(self.block.len() - 1);
1216        let mut to_pos = self.block[to_line];
1217        to_pos.start = range.end;
1218        from_pos.start = range.start;
1219
1220        block_to_values(BetweenBlockIterator::multi(
1221            BlockReader {
1222                source: self.source,
1223                bsource: self.bsource,
1224                block: self.block,
1225                line: Some(to_line),
1226                pos: to_pos,
1227                head: 0,
1228                last: 0,
1229                line_offset: None,
1230            },
1231            from_line,
1232            from_pos,
1233        ))
1234    }
1235}
1236
1237struct MultilineBetweenBlock<'a> {
1238    reader: BlockReader<'a>,
1239    start_line: usize,
1240    start_pos: Segment,
1241    current_line: usize,
1242    current_pos: Segment,
1243}
1244
1245struct BetweenBlockIterator<'a> {
1246    multi: Option<MultilineBetweenBlock<'a>>,
1247    single: Option<Range<usize>>,
1248    done: bool,
1249}
1250
1251impl<'a> BetweenBlockIterator<'a> {
1252    fn multi(mut reader: BlockReader<'a>, line: usize, pos: Segment) -> BetweenBlockIterator<'a> {
1253        let (current_line, current_pos) = reader.position();
1254        reader.set_position(line, pos);
1255        BetweenBlockIterator {
1256            multi: Some(MultilineBetweenBlock {
1257                reader,
1258                start_line: line,
1259                start_pos: pos,
1260                current_line,
1261                current_pos,
1262            }),
1263            single: None,
1264            done: false,
1265        }
1266    }
1267
1268    fn single(range: Range<usize>) -> BetweenBlockIterator<'a> {
1269        BetweenBlockIterator {
1270            multi: None,
1271            single: Some(range),
1272            done: false,
1273        }
1274    }
1275}
1276
1277impl<'a> Iterator for BetweenBlockIterator<'a> {
1278    type Item = Segment;
1279
1280    fn next(&mut self) -> Option<Self::Item> {
1281        if self.done {
1282            return None;
1283        }
1284        if let Some(s) = &self.single {
1285            self.done = true;
1286            return Some((s.start, s.end).into());
1287        }
1288        if let Some(m) = &mut self.multi {
1289            let (ln, _) = m.reader.position();
1290            let (_, segment) = m.reader.peek_line_bytes()?;
1291            let start = if ln == m.start_line {
1292                m.start_pos.start()
1293            } else {
1294                segment.start()
1295            };
1296            let stop = if ln == m.current_line {
1297                m.current_pos.start()
1298            } else {
1299                segment.stop()
1300            };
1301            let seg = Segment::new(start, stop);
1302            if ln == m.current_line {
1303                m.reader.advance(stop - start);
1304                self.done = true;
1305            }
1306            m.reader.advance_line();
1307            return Some(seg);
1308        }
1309        None
1310    }
1311}
1312
1313impl<'a> Reader<'a> for BlockReader<'a> {
1314    fn source(&self) -> &'a str {
1315        self.source
1316    }
1317
1318    fn position(&self) -> (usize, Segment) {
1319        (self.line.unwrap_or(0), self.pos)
1320    }
1321
1322    fn reset_position(&mut self) {
1323        self.line = None;
1324        self.head = 0;
1325        self.last = 0;
1326        self.line_offset = None;
1327        self.pos.start = 0;
1328        self.pos.stop = 0;
1329        self.pos.padding = 0;
1330        self.pos.force_newline = false;
1331        if let Some(l) = self.block.last() {
1332            self.last = l.stop;
1333        }
1334        self.advance_line();
1335    }
1336
1337    fn set_position(&mut self, line: usize, pos: Segment) {
1338        self.line_offset = None;
1339        self.line = Some(line);
1340        self.pos = pos;
1341        if line < self.block.len() {
1342            self.head = self.block[line].start;
1343        }
1344    }
1345
1346    fn set_padding(&mut self, padding: usize) {
1347        self.line_offset = None;
1348        self.pos.padding = padding as u8;
1349    }
1350
1351    fn peek_byte(&self) -> u8 {
1352        if self.bsource.is_empty() || self.block.is_empty() {
1353            return EOS;
1354        }
1355        if self.pos.padding() != 0 {
1356            return SPACE[0];
1357        }
1358        let l = self.line.unwrap();
1359        if self.pos.is_empty() {
1360            if l < self.block.len() - 1 {
1361                let next = &self.block[l + 1];
1362                if next.padding() != 0 {
1363                    return SPACE[0];
1364                }
1365                if next.start < self.bsource.len() {
1366                    return self.bsource[next.start];
1367                }
1368            }
1369            return EOS;
1370        } else if self.pos.start < self.bsource.len() {
1371            return self.bsource[self.pos.start];
1372        }
1373        EOS
1374    }
1375
1376    fn peek_line_segment(&self) -> Option<Segment> {
1377        if self.bsource.is_empty() || self.block.is_empty() {
1378            return None;
1379        }
1380        let l = self.line.unwrap();
1381        if self.pos.is_empty() {
1382            if l < self.block.len() - 1 {
1383                let s = self.block[l + 1].start;
1384                if s < self.bsource.len() {
1385                    return Some(self.block[l + 1]);
1386                }
1387            }
1388            return None;
1389        } else if self.pos.start < self.bsource.len() {
1390            return Some(self.pos);
1391        }
1392        None
1393    }
1394
1395    fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)> {
1396        if self.bsource.is_empty() || self.block.is_empty() {
1397            return None;
1398        }
1399        let l = self.line.unwrap();
1400        if self.pos.is_empty() {
1401            if l < self.block.len() - 1 {
1402                let s = self.block[l + 1].start;
1403                if s < self.bsource.len() {
1404                    return Some((self.block[l + 1].bytes(self.source), self.block[l + 1]));
1405                }
1406            }
1407            return None;
1408        } else if self.pos.start < self.bsource.len() {
1409            return Some((self.pos.bytes(self.source), self.pos));
1410        }
1411        None
1412    }
1413
1414    fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)> {
1415        if self.bsource.is_empty() || self.block.is_empty() {
1416            return None;
1417        }
1418        let l = self.line.unwrap();
1419        if self.pos.is_empty() {
1420            if l < self.block.len() - 1 {
1421                let s = self.block[l + 1].start;
1422                if s < self.bsource.len() {
1423                    return Some((self.block[l + 1].str(self.source), self.block[l + 1]));
1424                }
1425            }
1426            return None;
1427        } else if self.pos.start < self.bsource.len() {
1428            return Some((self.pos.str(self.source), self.pos));
1429        }
1430        None
1431    }
1432
1433    fn advance(&mut self, n: usize) {
1434        if self.bsource.is_empty() || self.block.is_empty() {
1435            return;
1436        }
1437        self.line_offset = None;
1438        if n < self.pos.len() && self.pos.padding() == 0 {
1439            self.pos.start += n;
1440            return;
1441        }
1442        let mut n = n;
1443        while n > 0 && self.pos.start < self.last {
1444            if self.pos.padding != 0 {
1445                self.pos.padding -= 1;
1446                n -= 1;
1447                continue;
1448            }
1449            if self.pos.start >= self.pos.stop - 1 && self.pos.stop < self.last {
1450                self.advance_line();
1451                n -= 1;
1452                continue;
1453            }
1454
1455            self.pos.start += 1;
1456            n -= 1;
1457        }
1458    }
1459
1460    fn advance_and_set_padding(&mut self, n: usize, padding: usize) {
1461        self.advance(n);
1462        if padding > self.pos.padding() {
1463            self.set_padding(padding);
1464        }
1465    }
1466
1467    fn advance_line(&mut self) {
1468        if self.bsource.is_empty() || self.block.is_empty() {
1469            return;
1470        }
1471        let l = match self.line {
1472            Some(l) => l + 1,
1473            None => 0,
1474        };
1475        if l < self.block.len() {
1476            self.set_position(l, self.block[l]);
1477        } else {
1478            self.pos.start = self.source().len();
1479            self.pos.stop = self.pos.start;
1480            self.pos.padding = 0;
1481        }
1482    }
1483
1484    fn advance_to_eol(&mut self) {
1485        if self.bsource.is_empty() || self.block.is_empty() {
1486            return;
1487        }
1488        self.line_offset = None;
1489        let c = self.bsource[self.pos.stop - 1];
1490        if c == b'\n' {
1491            self.pos.start = self.pos.stop - 1;
1492        } else {
1493            self.pos.start = self.pos.stop;
1494        }
1495    }
1496
1497    fn line_offset(&mut self) -> usize {
1498        if self.bsource.is_empty() || self.block.is_empty() {
1499            return 0;
1500        }
1501        if self.line_offset.is_none() {
1502            let mut v = 0;
1503            for i in self.head..self.pos.start {
1504                if self.bsource[i] == b'\t' {
1505                    v += util::tab_width(v);
1506                } else {
1507                    v += 1;
1508                }
1509            }
1510            v -= self.pos.padding();
1511            self.line_offset = Some(v);
1512        }
1513        self.line_offset.unwrap_or(0)
1514    }
1515
1516    fn precending_charater(&self) -> char {
1517        if self.pos.padding() != 0 {
1518            return ' ';
1519        }
1520        if self.pos.start() == 0 {
1521            return '\n';
1522        }
1523        if self.block.is_empty() {
1524            return '\n';
1525        }
1526        let first_line = &self.block[0];
1527        if self.line.unwrap_or(0) == 0 && self.pos.start() <= first_line.start() {
1528            return '\n';
1529        }
1530
1531        let mut i = self.pos.start() - 1;
1532        loop {
1533            if let Some(l) = utf8_len(self.bsource[i]) {
1534                if l == 1 {
1535                    return self.bsource[i] as char;
1536                }
1537                return str::from_utf8(&self.bsource[i..i + l])
1538                    .ok()
1539                    .and_then(|s| s.chars().next())
1540                    .unwrap_or('\u{FFFD}');
1541            }
1542            i -= 1;
1543            if i == 0 {
1544                break;
1545            }
1546        }
1547        if i == 0 {
1548            return '\n';
1549        }
1550        '\u{FFFD}'
1551    }
1552}
1553//   }}} BlockReader
1554
1555// }}} Reader
1556
1557// Tests {{{
1558
1559#[cfg(test)]
1560mod tests {
1561    use super::*;
1562
1563    #[allow(unused_imports)]
1564    #[cfg(all(not(feature = "std"), feature = "no-std-unix-debug"))]
1565    use crate::println;
1566
1567    #[test]
1568    fn test_segment() {
1569        let buffer = "Hello, world!";
1570        let segment: Segment = (0, 5).into();
1571        let s: &[u8] = &segment.bytes(buffer);
1572        assert_eq!(s, b"Hello");
1573
1574        let segment_with_padding = Segment::new_with_padding(0, 5, 3);
1575        let s: &[u8] = &segment_with_padding.bytes(buffer);
1576        assert_eq!(s, b"   Hello");
1577    }
1578
1579    #[test]
1580    fn test_raw() {
1581        let buffer = "Hello, world!";
1582        let index = Value::from((0, 5));
1583        let s: &[u8] = index.bytes(buffer);
1584        assert_eq!(s, b"Hello");
1585
1586        let raw_string = Value::from("Hello");
1587        let s: &[u8] = raw_string.bytes(buffer);
1588        assert_eq!(s, b"Hello");
1589
1590        let str: &str = index.str(buffer);
1591        assert_eq!(str, "Hello");
1592
1593        let string = String::from("Hello");
1594        let v = Value::from(string.as_str());
1595        assert_eq!(v.str(buffer), "Hello");
1596    }
1597
1598    #[test]
1599    fn test_bytes_reader() {
1600        let buffer = "Hello, world!\nThis is a test.\n";
1601        let mut reader = BasicReader::new(buffer);
1602        assert_eq!(reader.peek_byte(), b'H');
1603
1604        if let Some((line, segment)) = reader.peek_line_bytes() {
1605            assert_eq!(line.as_ref(), b"Hello, world!\n");
1606            assert_eq!(segment.start(), 0);
1607            assert_eq!(segment.stop(), 14);
1608        } else {
1609            panic!("Expected a line");
1610        }
1611
1612        reader.advance(7);
1613        assert_eq!(reader.peek_byte(), b'w');
1614
1615        reader.advance_line();
1616        assert_eq!(reader.peek_byte(), b'T');
1617
1618        if let Some((line, segment)) = reader.peek_line_bytes() {
1619            assert_eq!(line.as_ref(), b"This is a test.\n");
1620            assert_eq!(segment.start(), 14);
1621            assert_eq!(segment.stop(), 30);
1622        } else {
1623            panic!("Expected a line");
1624        }
1625
1626        reader.advance(100); // Advance beyond the end
1627        assert_eq!(reader.peek_byte(), EOS);
1628        assert!(reader.peek_line_bytes().is_none());
1629    }
1630
1631    #[test]
1632    fn test_bytes_reader_empty() {
1633        let buffer = "";
1634        let mut reader = BasicReader::new(buffer);
1635        assert_eq!(reader.peek_byte(), EOS);
1636        assert!(reader.peek_line_bytes().is_none());
1637        reader.advance(10);
1638        assert_eq!(reader.peek_byte(), EOS);
1639        assert!(reader.peek_line_bytes().is_none());
1640        reader.advance_line();
1641        assert_eq!(reader.peek_byte(), EOS);
1642        assert!(reader.peek_line_bytes().is_none());
1643    }
1644
1645    #[test]
1646    fn test_block_reader() {
1647        let buffer = "Hello, world!\nThis is a test.\n";
1648        let lines = [Segment::new(0, 14), Segment::new_with_padding(14, 30, 2)];
1649        let mut reader = BlockReader::new(buffer, &lines);
1650        assert_eq!(reader.peek_byte(), b'H');
1651
1652        if let Some((line, segment)) = reader.peek_line_bytes() {
1653            assert_eq!(line.as_ref(), b"Hello, world!\n");
1654            assert_eq!(segment.start(), 0);
1655            assert_eq!(segment.stop(), 14);
1656        } else {
1657            panic!("Expected a line");
1658        }
1659
1660        reader.advance(13);
1661        assert_eq!(reader.peek_byte(), b'\n');
1662
1663        reader.advance(1);
1664        assert_eq!(reader.peek_byte(), SPACE[0]);
1665
1666        if let Some((line, segment)) = reader.peek_line_bytes() {
1667            assert_eq!(line.as_ref(), b"  This is a test.\n");
1668            assert_eq!(segment.start(), 14);
1669            assert_eq!(segment.stop(), 30);
1670            assert_eq!(segment.padding(), 2);
1671        } else {
1672            panic!("Expected a line");
1673        }
1674
1675        reader.advance(3);
1676        assert_eq!(reader.peek_byte(), b'h');
1677
1678        reader.advance(100); // Advance beyond the end
1679        assert_eq!(reader.peek_byte(), EOS);
1680        assert!(reader.peek_line_bytes().is_none());
1681    }
1682
1683    #[test]
1684    fn test_block_reader_empty() {
1685        let buffer = "";
1686        let lines: [Segment; 0] = [];
1687        let mut reader = BlockReader::new(buffer, &lines);
1688        assert_eq!(reader.peek_byte(), EOS);
1689        assert!(reader.peek_line_bytes().is_none());
1690        reader.advance(10);
1691        assert_eq!(reader.peek_byte(), EOS);
1692        assert!(reader.peek_line_bytes().is_none());
1693        reader.advance_line();
1694        assert_eq!(reader.peek_byte(), EOS);
1695        assert!(reader.peek_line_bytes().is_none());
1696    }
1697}
1698
1699// }}} Tests