Skip to main content

rushdown/
text.rs

1//! Text related structures and traits.
2
3extern crate alloc;
4
5use core::ops::Range;
6
7use memchr::memchr;
8
9use crate::util::{self, is_blank, is_space, trim_left_space, utf8_len, TinyVec};
10use alloc::borrow::Cow;
11use alloc::string::String;
12use alloc::vec::Vec;
13
14#[allow(unused_imports)]
15#[cfg(all(not(feature = "std"), feature = "no-std-unix-debug"))]
16use crate::println;
17
18const SPACE: &[u8] = b" ";
19
20// Value {{{
21
22//   Value {{{
23
24/// An enum represents a string value that can be either an [`Index`] or a [`String`].
25/// [`Value`] does not handle padding.
26/// Value is used for representing values that can be represented by a single line, such as
27/// link destinations.
28#[derive(Debug, Clone)]
29#[non_exhaustive]
30pub enum Value {
31    /// An Index variant holds a reference to indicies in the source.
32    Index(Index),
33
34    /// A String variant holds a string value.
35    String(String),
36}
37
38impl Value {
39    /// Returns byte slice value.
40    pub fn bytes<'a>(&'a self, source: &'a str) -> &'a [u8] {
41        match self {
42            Value::Index(index) => index.bytes(source),
43            Value::String(s) => s.as_bytes(),
44        }
45    }
46
47    /// Returns str value.
48    pub fn str<'a>(&'a self, source: &'a str) -> &'a str {
49        match self {
50            Value::Index(index) => index.str(source),
51            Value::String(s) => s.as_str(),
52        }
53    }
54
55    /// Returns true if the value is empty, otherwise false.
56    pub fn is_empty(&self) -> bool {
57        match self {
58            Value::Index(index) => index.is_empty(),
59            Value::String(s) => s.is_empty(),
60        }
61    }
62
63    /// Returns the length of the value.
64    pub fn len(&self) -> usize {
65        match self {
66            Value::Index(index) => index.len(),
67            Value::String(s) => s.len(),
68        }
69    }
70}
71
72impl From<&str> for Value {
73    fn from(s: &str) -> Self {
74        Value::String(String::from(s))
75    }
76}
77
78impl From<String> for Value {
79    fn from(s: String) -> Self {
80        Value::String(s)
81    }
82}
83
84impl From<&[u8]> for Value {
85    fn from(s: &[u8]) -> Self {
86        Value::String(String::from_utf8_lossy(s).into_owned())
87    }
88}
89
90impl From<Vec<u8>> for Value {
91    fn from(s: Vec<u8>) -> Self {
92        Value::String(String::from_utf8_lossy(&s).into_owned())
93    }
94}
95
96impl From<&[char]> for Value {
97    fn from(s: &[char]) -> Self {
98        Value::String(s.iter().collect())
99    }
100}
101
102impl From<Cow<'_, [u8]>> for Value {
103    fn from(s: Cow<'_, [u8]>) -> Self {
104        Value::String(String::from_utf8_lossy(&s).into_owned())
105    }
106}
107
108impl From<Cow<'_, str>> for Value {
109    fn from(s: Cow<'_, str>) -> Self {
110        Value::String(s.into_owned())
111    }
112}
113
114impl From<&Value> for Value {
115    fn from(v: &Value) -> Self {
116        match v {
117            Value::Index(index) => Value::Index(*index),
118            Value::String(s) => Value::String(s.clone()),
119        }
120    }
121}
122
123impl From<(usize, usize)> for Value {
124    fn from((start, stop): (usize, usize)) -> Self {
125        Value::Index(Index::new(start, stop))
126    }
127}
128
129impl From<Segment> for Value {
130    fn from(segment: Segment) -> Self {
131        Value::Index(Index::new(segment.start(), segment.stop()))
132    }
133}
134//   }}} Value
135
136//   Index {{{
137
138/// An Index struct holds information about source positions.
139#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
140pub struct Index {
141    start: usize,
142
143    stop: usize,
144}
145
146impl Index {
147    /// Create a new Index with start and stop.
148    pub fn new(start: usize, stop: usize) -> Self {
149        Index { start, stop }
150    }
151
152    /// A Start position of the index.
153    #[inline(always)]
154    pub fn start(&self) -> usize {
155        self.start
156    }
157
158    /// A Stop position of the index.
159    #[inline(always)]
160    pub fn stop(&self) -> usize {
161        self.stop
162    }
163
164    /// Returns the bytes of the index from the source.
165    #[inline(always)]
166    pub fn bytes<'a>(&self, source: &'a str) -> &'a [u8] {
167        &source.as_bytes()[self.start..self.stop]
168    }
169
170    /// Returns the str of the index from the source.
171    ///
172    /// # Safety
173    /// This method does not check the validity of UTF-8 boundaries.
174    #[inline(always)]
175    pub fn str<'a>(&self, source: &'a str) -> &'a str {
176        unsafe { source.get_unchecked(self.start..self.stop) }
177    }
178
179    /// Returns true if the index is empty, otherwise false.
180    #[inline(always)]
181    pub fn is_empty(&self) -> bool {
182        self.start >= self.stop
183    }
184
185    /// Returns a new Index with same value except `stop`.
186    #[inline(always)]
187    pub fn with_start(&self, v: usize) -> Index {
188        Index::new(v, self.stop)
189    }
190
191    /// Returns a new Index with same value except `stop`.
192    #[inline(always)]
193    pub fn with_stop(&self, v: usize) -> Index {
194        Index::new(self.start, v)
195    }
196
197    /// Returns the length of the index.
198    #[inline(always)]
199    pub fn len(&self) -> usize {
200        self.stop - self.start
201    }
202}
203
204impl From<Index> for Value {
205    fn from(index: Index) -> Self {
206        Value::Index(index)
207    }
208}
209
210impl From<(usize, usize)> for Index {
211    fn from((start, stop): (usize, usize)) -> Self {
212        Index::new(start, stop)
213    }
214}
215
216impl From<Segment> for Index {
217    fn from(segment: Segment) -> Self {
218        Index::new(segment.start(), segment.stop())
219    }
220}
221
222//   }}} Index
223
224//   MultilineValue {{{
225
226/// An enum represents a collection of values that can be either a collection of [`Index`] or
227/// a single [`String`].
228/// MultilineValue is used for representing values that can be represented by multiple lines, such as link
229/// titles.
230#[derive(Debug, Clone, Default)]
231#[non_exhaustive]
232pub enum MultilineValue {
233    #[default]
234    Empty,
235    Indices(TinyVec<Index>),
236    String(String),
237}
238
239impl MultilineValue {
240    /// Creates a [`MultilineValue`] from a single index.
241    pub fn from_index(index: Index) -> Self {
242        MultilineValue::Indices(TinyVec::from_single(index))
243    }
244
245    /// Creates a [`MultilineValue`] from a collection of indices.
246    pub fn from_indices(indices: Vec<Index>) -> Self {
247        MultilineValue::Indices(TinyVec::from_vec(indices))
248    }
249
250    /// Creates a [`MultilineValue`] from a string.
251    pub fn from_string(s: String) -> Self {
252        MultilineValue::String(s)
253    }
254
255    /// Returns a str value by concatenating all indices in the collection.
256    /// If this value has multiple indices, it will trim leading spaces of each index except the
257    /// first one.
258    pub fn str<'a>(&'a self, source: &'a str) -> Cow<'a, str> {
259        match self {
260            MultilineValue::Empty => Cow::Borrowed(""),
261            MultilineValue::Indices(indices) => {
262                let first = indices.get(0);
263                let second = indices.get(1);
264                if let Some(f) = first {
265                    if second.is_none() {
266                        return Cow::Borrowed(f.str(source));
267                    }
268                } else {
269                    return Cow::Borrowed("");
270                }
271                let mut result = String::new();
272                result.push_str(first.unwrap().str(source));
273                let b = second.unwrap().bytes(source);
274                result.push_str(unsafe { core::str::from_utf8_unchecked(trim_left_space(b)) });
275                for v in indices.iter().skip(2) {
276                    let b = v.bytes(source);
277                    result.push_str(unsafe { core::str::from_utf8_unchecked(trim_left_space(b)) });
278                }
279                Cow::Owned(result)
280            }
281            MultilineValue::String(s) => Cow::Borrowed(s.as_str()),
282        }
283    }
284
285    /// Returns a bytes value by concatenating all indices in the collection.
286    /// If this value has multiple indices, it will trim leading spaces of each index except the
287    /// first one.
288    pub fn bytes<'a>(&'a self, source: &'a str) -> Cow<'a, [u8]> {
289        match self {
290            MultilineValue::Empty => Cow::Borrowed(&[]),
291            MultilineValue::Indices(indices) => {
292                let first = indices.get(0);
293                let second = indices.get(1);
294                if let Some(f) = first {
295                    if second.is_none() {
296                        return Cow::Borrowed(f.bytes(source));
297                    }
298                } else {
299                    return Cow::Borrowed(&[]);
300                }
301                let mut result = Vec::new();
302                result.extend_from_slice(first.unwrap().bytes(source));
303                result.extend_from_slice(trim_left_space(second.unwrap().bytes(source)));
304                for v in indices.iter().skip(2) {
305                    result.extend_from_slice(trim_left_space(v.bytes(source)));
306                }
307                Cow::Owned(result)
308            }
309            MultilineValue::String(s) => Cow::Borrowed(s.as_bytes()),
310        }
311    }
312}
313
314impl From<String> for MultilineValue {
315    fn from(s: String) -> Self {
316        MultilineValue::String(s)
317    }
318}
319
320impl From<&String> for MultilineValue {
321    fn from(s: &String) -> Self {
322        MultilineValue::String(s.clone())
323    }
324}
325
326impl From<&str> for MultilineValue {
327    fn from(s: &str) -> Self {
328        MultilineValue::String(String::from(s))
329    }
330}
331
332impl From<&[u8]> for MultilineValue {
333    fn from(s: &[u8]) -> Self {
334        MultilineValue::String(String::from_utf8_lossy(s).into_owned())
335    }
336}
337
338impl From<Vec<u8>> for MultilineValue {
339    fn from(s: Vec<u8>) -> Self {
340        MultilineValue::String(String::from_utf8_lossy(&s).into_owned())
341    }
342}
343
344impl From<Cow<'_, str>> for MultilineValue {
345    fn from(s: Cow<'_, str>) -> Self {
346        MultilineValue::String(s.into_owned())
347    }
348}
349
350impl From<Cow<'_, [u8]>> for MultilineValue {
351    fn from(s: Cow<'_, [u8]>) -> Self {
352        MultilineValue::String(String::from_utf8_lossy(&s).into_owned())
353    }
354}
355
356impl From<Value> for MultilineValue {
357    fn from(v: Value) -> Self {
358        match v {
359            Value::Index(index) => MultilineValue::Indices(TinyVec::from_single(index)),
360            Value::String(s) => MultilineValue::String(s),
361        }
362    }
363}
364
365impl From<Segment> for MultilineValue {
366    fn from(segment: Segment) -> Self {
367        MultilineValue::Indices(TinyVec::from_single(segment.into()))
368    }
369}
370
371impl From<TinyVec<Index>> for MultilineValue {
372    fn from(indices: TinyVec<Index>) -> Self {
373        MultilineValue::Indices(indices)
374    }
375}
376
377//   }}} MuiltineValue
378
379// }}} Value
380
381// Segment {{{
382
383//   Lines {{{
384
385/// An enum represents a collection of segments that can be either a collection of [`Segment`] or
386/// a single [`String`].
387/// Lines is used for representing lines of block elements that can be represented by multiple lines,
388/// such as HTML blocks.
389#[derive(Debug, Clone, Default)]
390#[non_exhaustive]
391pub enum Lines {
392    #[default]
393    Empty,
394    Segments(Vec<Segment>),
395    String(String),
396}
397
398impl Lines {
399    /// Creates a [`Lines`] from a collection of segments.
400    pub fn from_segments(segments: Vec<Segment>) -> Self {
401        Lines::Segments(segments)
402    }
403
404    /// Creates a [`Lines`] from a string.
405    pub fn from_string(s: String) -> Self {
406        Lines::String(s)
407    }
408
409    /// Returns an iterator that iterates over the lines of this [`Lines`] as str.
410    pub fn iter<'a>(&'a self, source: &'a str) -> impl Iterator<Item = Cow<'a, str>> {
411        LinesIter::new(
412            match self {
413                Lines::Empty => LinesIterState::Empty,
414                Lines::Segments(segments) => LinesIterState::Segments(segments.iter()),
415                Lines::String(s) => LinesIterState::String(s.split_inclusive('\n')),
416            },
417            source,
418        )
419    }
420}
421
422impl From<String> for Lines {
423    fn from(s: String) -> Self {
424        Lines::String(s)
425    }
426}
427
428impl From<&String> for Lines {
429    fn from(s: &String) -> Self {
430        Lines::String(s.clone())
431    }
432}
433
434impl From<&str> for Lines {
435    fn from(s: &str) -> Self {
436        Lines::String(String::from(s))
437    }
438}
439
440impl From<&[u8]> for Lines {
441    fn from(s: &[u8]) -> Self {
442        Lines::String(String::from_utf8_lossy(s).into_owned())
443    }
444}
445
446impl From<Vec<Segment>> for Lines {
447    fn from(segments: Vec<Segment>) -> Self {
448        Lines::Segments(segments)
449    }
450}
451
452impl From<&[Segment]> for Lines {
453    fn from(segments: &[Segment]) -> Self {
454        Lines::Segments(segments.to_vec())
455    }
456}
457
458enum LinesIterState<'a> {
459    Empty,
460    Segments(core::slice::Iter<'a, Segment>),
461    String(core::str::SplitInclusive<'a, char>),
462}
463
464/// Iterator that iterates over [`Lines`] as str
465struct LinesIter<'a> {
466    state: LinesIterState<'a>,
467    source: &'a str,
468}
469
470impl<'a> LinesIter<'a> {
471    /// Creates a new LinesIter with the given lines and source.
472    pub fn new(state: LinesIterState<'a>, source: &'a str) -> Self {
473        LinesIter { state, source }
474    }
475}
476
477impl<'a> Iterator for LinesIter<'a> {
478    type Item = Cow<'a, str>;
479
480    #[inline(always)]
481    fn next(&mut self) -> Option<Self::Item> {
482        match &mut self.state {
483            LinesIterState::Empty => None,
484            LinesIterState::Segments(iter) => iter.next().map(|segment| segment.str(self.source)),
485            LinesIterState::String(iter) => iter.next().map(Cow::Borrowed),
486        }
487    }
488}
489
490//   }}} Lines
491
492//   Block {{{
493
494/// Special collection of segments.
495/// Each segment represents a one line.
496/// Each segment does not contain multiple lines.
497pub type Block = [Segment];
498
499fn binary_search_block_pos(block: &Block, pos: usize) -> Option<usize> {
500    let mut left = 0;
501    let mut right = block.len();
502    while left < right {
503        let mid = (left + right) / 2;
504        if block[mid].start() <= pos && pos < block[mid].stop() {
505            return Some(mid);
506        }
507        if pos < block[mid].start() {
508            right = mid;
509        } else {
510            left = mid + 1;
511        }
512    }
513    None
514}
515
516/// Extension trait for [`Block`].
517pub trait BlockExt {
518    /// Returns a collection of values by converting each segment in the block to a value.
519    fn to_values(&self) -> MultilineValue;
520}
521
522impl BlockExt for Block {
523    fn to_values(&self) -> MultilineValue {
524        let first = self.first();
525        let second = self.get(1);
526        if let Some(f) = first {
527            if second.is_none() {
528                return MultilineValue::from_index((f.start(), f.stop()).into());
529            }
530        } else {
531            return MultilineValue::default();
532        }
533        let mut result = Vec::with_capacity(self.len());
534        for v in self.iter() {
535            result.push((v.start(), v.stop()).into());
536        }
537        MultilineValue::from_indices(result)
538    }
539}
540
541pub(crate) fn block_to_values(i: impl IntoIterator<Item = Segment>) -> MultilineValue {
542    let mut b = i.into_iter();
543    let first = b.next();
544    let second = b.next();
545    if let Some(f) = first {
546        if second.is_none() {
547            return MultilineValue::from_index(f.into());
548        }
549    } else {
550        return MultilineValue::default();
551    }
552    let mut result = Vec::with_capacity(2 + b.size_hint().0);
553    result.push(first.unwrap().into());
554    result.push(second.unwrap().into());
555    for segment in b {
556        result.push(segment.into());
557    }
558    MultilineValue::from_indices(result)
559}
560
561//   }}} Block
562
563//   Segment {{{
564
565/// A Segment struct repsents a segment of CommonMark text.
566/// In addition to [`Index`], Segment has padding and force_newline fields.
567#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
568pub struct Segment {
569    start: usize,
570
571    stop: usize,
572
573    padding: u8,
574
575    force_newline: bool,
576}
577
578impl Segment {
579    /// Creates a [`Segment`] with start and stop.
580    pub fn new(start: usize, stop: usize) -> Self {
581        Segment {
582            start,
583            stop,
584            padding: 0,
585            force_newline: false,
586        }
587    }
588
589    /// Create a Segment with start, stop, and padding.
590    pub fn new_with_padding(start: usize, stop: usize, padding: usize) -> Self {
591        Segment {
592            start,
593            stop,
594            padding: padding as u8,
595            force_newline: false,
596        }
597    }
598
599    /// A Start position of the segment.
600    #[inline(always)]
601    pub fn start(&self) -> usize {
602        self.start
603    }
604
605    /// A Stop position of the segment.
606    #[inline(always)]
607    pub fn stop(&self) -> usize {
608        self.stop
609    }
610
611    /// A Padding length of the segment.
612    /// In CommonMark, Tab width is varied corresponding to horizontal position.
613    /// So, padding is used to represent the number of leading spaces that should be inserted
614    /// to align the text.
615    #[inline(always)]
616    pub fn padding(&self) -> usize {
617        self.padding as usize
618    }
619
620    /// A Force newline flag of the segment.
621    #[inline(always)]
622    pub fn force_newline(&self) -> bool {
623        self.force_newline
624    }
625
626    /// Returns the bytes of the segment from the source.
627    pub fn bytes<'a>(&self, source: &'a str) -> Cow<'a, [u8]> {
628        if self.padding == 0
629            && (!self.force_newline || source.as_bytes().get(self.stop - 1) == Some(&b'\n'))
630        {
631            Cow::Borrowed(&source.as_bytes()[self.start..self.stop])
632        } else {
633            let mut result = Vec::with_capacity(self.padding() + self.stop - self.start + 1);
634            result.extend(core::iter::repeat_n(SPACE[0], self.padding()));
635            result.extend_from_slice(&source.as_bytes()[self.start..self.stop]);
636            if self.force_newline && !result.is_empty() && *result.last().unwrap() != b'\n' {
637                result.push(b'\n');
638            }
639            Cow::Owned(result)
640        }
641    }
642
643    /// Returns the str of the segment from the source as a string.
644    ///
645    /// # Safety
646    /// This method does not check the validity of UTF-8 boundaries.
647    pub fn str<'a>(&self, source: &'a str) -> Cow<'a, str> {
648        if self.padding == 0
649            && (!self.force_newline || source.as_bytes().get(self.stop - 1) == Some(&b'\n'))
650        {
651            unsafe { Cow::Borrowed(source.get_unchecked(self.start..self.stop)) }
652        } else {
653            let mut result = String::with_capacity(self.padding() + self.stop - self.start + 1);
654            result.extend(core::iter::repeat_n(' ', self.padding()));
655            unsafe { result.push_str(source.get_unchecked(self.start..self.stop)) };
656            if self.force_newline && !result.is_empty() && result.as_bytes().last() != Some(&b'\n')
657            {
658                result.push('\n');
659            }
660            Cow::Owned(result)
661        }
662    }
663
664    /// Returns the length of the segment.
665    #[inline(always)]
666    pub fn len(&self) -> usize {
667        self.stop - self.start + self.padding()
668    }
669
670    /// Returns a segment between this segment and the given segment.
671    pub fn between(&self, other: Segment) -> Segment {
672        if self.stop != other.stop {
673            panic!("invalid state");
674        }
675        Segment::new_with_padding(
676            self.start,
677            other.start,
678            (self.padding - other.padding) as usize,
679        )
680    }
681
682    /// Returns true if this segment is empty, otherwise false.
683    #[inline(always)]
684    pub fn is_empty(&self) -> bool {
685        self.start >= self.stop && self.padding == 0
686    }
687
688    /// Returns true if this segment is blank (only space characters), otherwise false.
689    pub fn is_blank(&self, source: &str) -> bool {
690        let v = &source.as_bytes()[self.start..self.stop];
691        is_blank(v)
692    }
693
694    /// Returns a new segment by slicing off all trailing space characters.
695    pub fn trim_right_space(&self, source: &str) -> Segment {
696        let v = &source.as_bytes()[self.start..self.stop];
697        let l = util::trim_right_space_length(v);
698        if l == v.len() {
699            Segment::new(self.start, self.start)
700        } else {
701            Segment::new_with_padding(self.start, self.stop - l, self.padding as usize)
702        }
703    }
704
705    /// Returns a new segment by slicing off all leading space characters including padding.
706    pub fn trim_left_space(&self, source: &str) -> Segment {
707        let v = &source.as_bytes()[self.start..self.stop];
708        let l = util::trim_left_space_length(v);
709        Segment::new(self.start + l, self.stop)
710    }
711
712    /// Returns a new segment by slicing off leading space
713    /// characters until the given width.
714    pub fn trim_left_space_width(&self, mut width: isize, source: &str) -> Segment {
715        let mut padding = self.padding as isize;
716        while width > 0 && padding > 0 {
717            width -= 1;
718            padding -= 1;
719        }
720        if width == 0 {
721            return Segment::new_with_padding(self.start, self.stop, padding as usize);
722        }
723        let v = &source.as_bytes()[self.start..self.stop];
724        let mut start = self.start;
725        for &c in v {
726            if start >= self.stop - 1 || width == 0 {
727                break;
728            }
729            if c == b' ' {
730                width -= 1;
731            } else if c == b'\t' {
732                width -= 4;
733            } else {
734                break;
735            }
736            start += 1;
737        }
738        if width < 0 {
739            padding = -width;
740        }
741        Segment::new_with_padding(start, self.stop, padding as usize)
742    }
743
744    /// Returns a new Segment with same value except `start`.
745    #[inline(always)]
746    pub fn with_start(&self, v: usize) -> Segment {
747        Segment::new_with_padding(v, self.stop, self.padding as usize)
748    }
749
750    /// Returns a new Segment with same value except `stop`.
751    #[inline(always)]
752    pub fn with_stop(&self, v: usize) -> Segment {
753        Segment::new_with_padding(self.start, v, self.padding as usize)
754    }
755
756    /// Returns a new Segment with padding set to given value.
757    #[inline(always)]
758    pub fn with_padding(&self, v: usize) -> Segment {
759        Segment::new_with_padding(self.start, self.stop, v)
760    }
761
762    /// Returns a new Segment with force_newline set to `v`.
763    #[inline(always)]
764    pub fn with_force_newline(&self, v: bool) -> Segment {
765        Segment {
766            start: self.start,
767            stop: self.stop,
768            padding: self.padding,
769            force_newline: v,
770        }
771    }
772
773    /// Returns an Index with same start and stop as this segment.
774    #[inline(always)]
775    pub fn to_index(&self) -> Index {
776        Index::new(self.start, self.stop)
777    }
778}
779
780impl From<(usize, usize)> for Segment {
781    fn from((start, stop): (usize, usize)) -> Self {
782        Segment::new(start, stop)
783    }
784}
785
786impl From<(usize, usize, usize)> for Segment {
787    fn from((start, stop, padding): (usize, usize, usize)) -> Self {
788        Segment::new_with_padding(start, stop, padding)
789    }
790}
791
792impl From<Index> for Segment {
793    fn from(index: Index) -> Self {
794        Segment::new(index.start(), index.stop())
795    }
796}
797
798impl From<Segment> for Range<usize> {
799    fn from(segment: Segment) -> Self {
800        segment.start()..segment.stop()
801    }
802}
803
804//   }}} Segment
805
806// }}} Segment
807
808// Reader {{{
809
810/// Indicates the end of string.
811pub const EOS: u8 = 0xff;
812
813/// A Reader trait represents a reader that can read and peek bytes.
814pub trait Reader<'a> {
815    /// Returns the source str.
816    fn source(&self) -> &'a str;
817
818    /// Returns current line number and position.
819    fn position(&self) -> (usize, Segment);
820
821    /// Resets the internal pointer to the beginning of the source.
822    fn reset_position(&mut self);
823
824    /// Sets current line number and position.
825    fn set_position(&mut self, line: usize, pos: Segment);
826
827    /// Sets padding to the reader.
828    fn set_padding(&mut self, padding: usize);
829
830    /// Reads the next byte without advancing the position.
831    /// Returns [`EOS`] if the end of the source is reached.
832    fn peek_byte(&self) -> u8;
833
834    /// Reads the next line segment without advancing the position.
835    /// Returns None if the end of the source is reached.
836    fn peek_line_segment(&self) -> Option<Segment>;
837
838    /// Reads the next line without advancing the position.
839    /// Returns None if the end of the source is reached.
840    fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)>;
841
842    /// Reads the next line without advancing the position.
843    /// Returns None if the end of the source is reached.
844    fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)>;
845
846    /// Advances the internal pointer.
847    fn advance(&mut self, n: usize);
848
849    /// Advances the internal pointer and add padding to the
850    /// reader.
851    fn advance_and_set_padding(&mut self, n: usize, padding: usize);
852
853    /// Advances the internal pointer to the next line head.
854    fn advance_line(&mut self);
855
856    /// Advances the internal pointer to the end of line.
857    /// If the line ends with a newline, it will be included in the segment.
858    /// If the line ends with EOF, it will not be included in the segment.
859    fn advance_to_eol(&mut self);
860
861    /// Returns a distance from the line head to current position.
862    fn line_offset(&mut self) -> usize;
863
864    /// Returns a character just before current internal pointer.
865    fn precending_charater(&self) -> char;
866
867    /// Skips blank lines and advances the internal pointer to the next non-blank line.
868    /// Returns None if the end of the source is reached.
869    fn skip_blank_lines(&mut self) -> Option<(Cow<'a, [u8]>, Segment)> {
870        loop {
871            match self.peek_line_bytes() {
872                None => return None,
873                Some((line, seg)) => {
874                    if is_blank(&line) {
875                        self.advance_line();
876                        continue;
877                    }
878                    return Some((line, seg));
879                }
880            }
881        }
882    }
883
884    /// Skips bytes while the given function returns true.
885    fn skip_while<F>(&mut self, mut f: F) -> usize
886    where
887        F: FnMut(u8) -> bool,
888    {
889        let mut i = 0usize;
890        loop {
891            let b = self.peek_byte();
892            if b == EOS {
893                break;
894            }
895            if f(b) {
896                i += 1;
897                self.advance(1);
898                continue;
899            }
900            break;
901        }
902        i
903    }
904
905    /// Skips space characters.
906    fn skip_spaces(&mut self) -> usize {
907        self.skip_while(is_space)
908    }
909}
910
911//   BasicReader {{{
912
913/// [`Reader`] implementation for byte slices.
914pub struct BasicReader<'a> {
915    source: &'a str,
916    bsource: &'a [u8],
917    source_length: usize,
918    line: Option<usize>,
919    pos: Segment,
920    head: usize,
921    line_offset: Option<usize>,
922}
923
924impl<'a> BasicReader<'a> {
925    /// Creates a new BasicReader with the given source.
926    pub fn new(source: &'a str) -> Self {
927        let bsource: &[u8] = source.as_bytes();
928        let source_length = bsource.len();
929        let mut b = BasicReader {
930            source,
931            bsource,
932            source_length,
933            line: None,
934            pos: Segment::new(0, 0),
935            head: 0,
936            line_offset: None,
937        };
938        b.reset_position();
939        b
940    }
941
942    /// Creates a new BasicReader with the given byte slice without UTF-8 validation.
943    ///
944    /// # Safety
945    /// - The caller must ensure that the given byte slice is valid UTF-8.
946    pub unsafe fn new_unchecked(source: &'a [u8]) -> Self {
947        Self::new(core::str::from_utf8_unchecked(source))
948    }
949}
950
951impl<'a> Reader<'a> for BasicReader<'a> {
952    fn source(&self) -> &'a str {
953        self.source
954    }
955
956    fn position(&self) -> (usize, Segment) {
957        (self.line.unwrap_or(0), self.pos)
958    }
959
960    fn reset_position(&mut self) {
961        self.line = None;
962        self.head = 0;
963        self.line_offset = None;
964        self.advance_line();
965    }
966
967    fn set_position(&mut self, line: usize, pos: Segment) {
968        self.line = Some(line);
969        self.pos = pos;
970        self.head = pos.start;
971        self.line_offset = None;
972    }
973
974    fn set_padding(&mut self, padding: usize) {
975        self.pos.padding = padding as u8;
976    }
977
978    fn peek_byte(&self) -> u8 {
979        if self.source_length == 0 {
980            return EOS;
981        }
982        if self.pos.padding() != 0 {
983            return SPACE[0];
984        }
985        if self.pos.start() < self.source_length {
986            return self.bsource[self.pos.start()];
987        }
988        EOS
989    }
990
991    fn peek_line_segment(&self) -> Option<Segment> {
992        if self.source_length == 0 {
993            return None;
994        }
995        if self.pos.start() < self.source_length {
996            return Some(self.pos);
997        }
998        None
999    }
1000
1001    fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)> {
1002        if self.source_length == 0 {
1003            return None;
1004        }
1005        if self.pos.start() < self.source_length {
1006            return Some((self.pos.bytes(self.source), self.pos));
1007        }
1008        None
1009    }
1010
1011    fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)> {
1012        if self.source_length == 0 {
1013            return None;
1014        }
1015        if self.pos.start() < self.source_length {
1016            return Some((self.pos.str(self.source), self.pos));
1017        }
1018        None
1019    }
1020
1021    fn advance(&mut self, n: usize) {
1022        if self.source_length == 0 {
1023            return;
1024        }
1025
1026        self.line_offset = None;
1027        if n < self.pos.len() && self.pos.padding() == 0 {
1028            self.pos.start += n;
1029            return;
1030        }
1031        let mut n = n;
1032        while n > 0 && self.pos.start < self.source_length {
1033            if self.pos.padding != 0 {
1034                self.pos.padding -= 1;
1035                n -= 1;
1036                continue;
1037            }
1038            if self.bsource[self.pos.start] == b'\n' {
1039                self.advance_line();
1040                n -= 1;
1041                continue;
1042            }
1043
1044            self.pos.start += 1;
1045            n -= 1;
1046        }
1047    }
1048
1049    fn advance_and_set_padding(&mut self, n: usize, padding: usize) {
1050        self.advance(n);
1051        if padding > self.pos.padding() {
1052            self.set_padding(padding);
1053        }
1054    }
1055
1056    fn advance_line(&mut self) {
1057        self.line_offset = None;
1058        if self.source_length == 0 || self.pos.start >= self.source_length {
1059            return;
1060        }
1061
1062        if self.line.is_some() {
1063            self.pos.start = self.pos.stop;
1064            if self.pos.start >= self.source_length {
1065                return;
1066            }
1067            self.pos.stop = self.source_length;
1068            if self.bsource[self.pos.start] != b'\n' {
1069                if let Some(i) = memchr(b'\n', &self.bsource[self.pos.start..]) {
1070                    self.pos.stop = self.pos.start + i + 1;
1071                }
1072            } else {
1073                self.pos.stop = self.pos.start + 1;
1074            }
1075            self.line = Some(self.line.unwrap() + 1);
1076        } else {
1077            if let Some(i) = memchr(b'\n', self.bsource) {
1078                self.pos = (0, i + 1).into();
1079            } else {
1080                self.pos = (0, self.source_length).into();
1081            }
1082            self.line = Some(0);
1083        }
1084        self.head = self.pos.start;
1085        self.pos.padding = 0;
1086    }
1087
1088    fn advance_to_eol(&mut self) {
1089        if self.source_length == 0 || self.pos.start >= self.source_length {
1090            return;
1091        }
1092
1093        self.line_offset = None;
1094        if let Some(i) = memchr(b'\n', &self.bsource[self.pos.start..]) {
1095            self.pos.start += i;
1096        } else {
1097            self.pos.start = self.source_length;
1098        }
1099        self.pos.padding = 0;
1100    }
1101
1102    fn line_offset(&mut self) -> usize {
1103        if self.line_offset.is_none() {
1104            let mut v = 0;
1105            for i in self.head..self.pos.start {
1106                if self.bsource[i] == b'\t' {
1107                    v += util::tab_width(v);
1108                } else {
1109                    v += 1;
1110                }
1111            }
1112            v -= self.pos.padding();
1113            self.line_offset = Some(v);
1114        }
1115        self.line_offset.unwrap_or(0)
1116    }
1117
1118    fn precending_charater(&self) -> char {
1119        if self.pos.padding() != 0 {
1120            return ' ';
1121        }
1122        if self.pos.start() == 0 {
1123            return '\n';
1124        }
1125        let mut i = self.pos.start() - 1;
1126        loop {
1127            if let Some(l) = utf8_len(self.bsource[i]) {
1128                if l == 1 {
1129                    return self.bsource[i] as char;
1130                }
1131                return str::from_utf8(&self.bsource[i..i + l])
1132                    .ok()
1133                    .and_then(|s| s.chars().next())
1134                    .unwrap_or('\u{FFFD}');
1135            }
1136            i -= 1;
1137            if i == 0 {
1138                break;
1139            }
1140        }
1141        '\u{FFFD}'
1142    }
1143}
1144
1145//   }}} BasicReader
1146
1147//   BlockReader {{{
1148
1149/// [`Reader`] implementation for given blocks.
1150pub struct BlockReader<'a> {
1151    source: &'a str,
1152    bsource: &'a [u8],
1153    block: &'a Block,
1154    line: Option<usize>,
1155    pos: Segment,
1156    head: usize,
1157    last: usize,
1158    line_offset: Option<usize>,
1159}
1160
1161impl<'a> BlockReader<'a> {
1162    /// Creates a new BlockReader with the given source and block.
1163    pub fn new(source: &'a str, block: &'a Block) -> Self {
1164        let mut b = BlockReader {
1165            source,
1166            bsource: source.as_bytes(),
1167            block,
1168            line: None,
1169            pos: Segment::new(0, 0),
1170            head: 0,
1171            last: 0,
1172            line_offset: None,
1173        };
1174        b.reset(block);
1175        b
1176    }
1177
1178    /// Creates a new BlockReader with the given byte slice without UTF-8 validation.
1179    ///
1180    /// # Safety
1181    /// - The caller must ensure that the given byte slice is valid UTF-8.
1182    pub unsafe fn new_unchecked(source: &'a [u8], block: &'a Block) -> Self {
1183        Self::new(core::str::from_utf8_unchecked(source), block)
1184    }
1185
1186    /// Resets the reader with given new block.
1187    pub fn reset(&mut self, lines: &'a Block) {
1188        self.block = lines;
1189        self.reset_position();
1190    }
1191
1192    /// Returns Values that contains value between the current position and the given
1193    /// position.
1194    pub fn between_current(&mut self, line: usize, pos: Segment) -> MultilineValue {
1195        if line == self.line.unwrap_or(0) {
1196            let seg = self.block[line];
1197            if pos.start() >= seg.start() && self.pos.start() <= seg.stop() {
1198                return block_to_values(BetweenBlockIterator::single(
1199                    pos.start()..self.pos.start(),
1200                ));
1201            }
1202        }
1203        block_to_values(BetweenBlockIterator::multi(
1204            BlockReader {
1205                source: self.source,
1206                bsource: self.bsource,
1207                block: self.block,
1208                line: self.line,
1209                pos: self.pos,
1210                head: self.head,
1211                last: self.last,
1212                line_offset: self.line_offset,
1213            },
1214            line,
1215            pos,
1216        ))
1217    }
1218
1219    /// Returns Values that contains segments between the given range.
1220    pub fn between(&self, range: Range<usize>) -> MultilineValue {
1221        let from_line = binary_search_block_pos(self.block, range.start).unwrap_or(0);
1222        let mut from_pos = self.block[from_line];
1223        if range.start >= from_pos.start() && range.end <= from_pos.stop() {
1224            return block_to_values(BetweenBlockIterator::single(range));
1225        }
1226        let to_line =
1227            binary_search_block_pos(self.block, range.end).unwrap_or(self.block.len() - 1);
1228        let mut to_pos = self.block[to_line];
1229        to_pos.start = range.end;
1230        from_pos.start = range.start;
1231
1232        block_to_values(BetweenBlockIterator::multi(
1233            BlockReader {
1234                source: self.source,
1235                bsource: self.bsource,
1236                block: self.block,
1237                line: Some(to_line),
1238                pos: to_pos,
1239                head: 0,
1240                last: 0,
1241                line_offset: None,
1242            },
1243            from_line,
1244            from_pos,
1245        ))
1246    }
1247}
1248
1249struct MultilineBetweenBlock<'a> {
1250    reader: BlockReader<'a>,
1251    start_line: usize,
1252    start_pos: Segment,
1253    current_line: usize,
1254    current_pos: Segment,
1255}
1256
1257struct BetweenBlockIterator<'a> {
1258    multi: Option<MultilineBetweenBlock<'a>>,
1259    single: Option<Range<usize>>,
1260    done: bool,
1261}
1262
1263impl<'a> BetweenBlockIterator<'a> {
1264    fn multi(mut reader: BlockReader<'a>, line: usize, pos: Segment) -> BetweenBlockIterator<'a> {
1265        let (current_line, current_pos) = reader.position();
1266        reader.set_position(line, pos);
1267        BetweenBlockIterator {
1268            multi: Some(MultilineBetweenBlock {
1269                reader,
1270                start_line: line,
1271                start_pos: pos,
1272                current_line,
1273                current_pos,
1274            }),
1275            single: None,
1276            done: false,
1277        }
1278    }
1279
1280    fn single(range: Range<usize>) -> BetweenBlockIterator<'a> {
1281        BetweenBlockIterator {
1282            multi: None,
1283            single: Some(range),
1284            done: false,
1285        }
1286    }
1287}
1288
1289impl<'a> Iterator for BetweenBlockIterator<'a> {
1290    type Item = Segment;
1291
1292    fn next(&mut self) -> Option<Self::Item> {
1293        if self.done {
1294            return None;
1295        }
1296        if let Some(s) = &self.single {
1297            self.done = true;
1298            return Some((s.start, s.end).into());
1299        }
1300        if let Some(m) = &mut self.multi {
1301            let (ln, _) = m.reader.position();
1302            let (_, segment) = m.reader.peek_line_bytes()?;
1303            let start = if ln == m.start_line {
1304                m.start_pos.start()
1305            } else {
1306                segment.start()
1307            };
1308            let stop = if ln == m.current_line {
1309                m.current_pos.start()
1310            } else {
1311                segment.stop()
1312            };
1313            let seg = Segment::new(start, stop);
1314            if ln == m.current_line {
1315                m.reader.advance(stop - start);
1316                self.done = true;
1317            }
1318            m.reader.advance_line();
1319            return Some(seg);
1320        }
1321        None
1322    }
1323}
1324
1325impl<'a> Reader<'a> for BlockReader<'a> {
1326    fn source(&self) -> &'a str {
1327        self.source
1328    }
1329
1330    fn position(&self) -> (usize, Segment) {
1331        (self.line.unwrap_or(0), self.pos)
1332    }
1333
1334    fn reset_position(&mut self) {
1335        self.line = None;
1336        self.head = 0;
1337        self.last = 0;
1338        self.line_offset = None;
1339        self.pos.start = 0;
1340        self.pos.stop = 0;
1341        self.pos.padding = 0;
1342        self.pos.force_newline = false;
1343        if let Some(l) = self.block.last() {
1344            self.last = l.stop;
1345        }
1346        self.advance_line();
1347    }
1348
1349    fn set_position(&mut self, line: usize, pos: Segment) {
1350        self.line_offset = None;
1351        self.line = Some(line);
1352        self.pos = pos;
1353        if line < self.block.len() {
1354            self.head = self.block[line].start;
1355        }
1356    }
1357
1358    fn set_padding(&mut self, padding: usize) {
1359        self.line_offset = None;
1360        self.pos.padding = padding as u8;
1361    }
1362
1363    fn peek_byte(&self) -> u8 {
1364        if self.bsource.is_empty() || self.block.is_empty() {
1365            return EOS;
1366        }
1367        if self.pos.padding() != 0 {
1368            return SPACE[0];
1369        }
1370        let l = self.line.unwrap();
1371        if self.pos.is_empty() {
1372            if l < self.block.len() - 1 {
1373                let next = &self.block[l + 1];
1374                if next.padding() != 0 {
1375                    return SPACE[0];
1376                }
1377                if next.start < self.bsource.len() {
1378                    return self.bsource[next.start];
1379                }
1380            }
1381            return EOS;
1382        } else if self.pos.start < self.bsource.len() {
1383            return self.bsource[self.pos.start];
1384        }
1385        EOS
1386    }
1387
1388    fn peek_line_segment(&self) -> Option<Segment> {
1389        if self.bsource.is_empty() || self.block.is_empty() {
1390            return None;
1391        }
1392        let l = self.line.unwrap();
1393        if self.pos.is_empty() {
1394            if l < self.block.len() - 1 {
1395                let s = self.block[l + 1].start;
1396                if s < self.bsource.len() {
1397                    return Some(self.block[l + 1]);
1398                }
1399            }
1400            return None;
1401        } else if self.pos.start < self.bsource.len() {
1402            return Some(self.pos);
1403        }
1404        None
1405    }
1406
1407    fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)> {
1408        if self.bsource.is_empty() || self.block.is_empty() {
1409            return None;
1410        }
1411        let l = self.line.unwrap();
1412        if self.pos.is_empty() {
1413            if l < self.block.len() - 1 {
1414                let s = self.block[l + 1].start;
1415                if s < self.bsource.len() {
1416                    return Some((self.block[l + 1].bytes(self.source), self.block[l + 1]));
1417                }
1418            }
1419            return None;
1420        } else if self.pos.start < self.bsource.len() {
1421            return Some((self.pos.bytes(self.source), self.pos));
1422        }
1423        None
1424    }
1425
1426    fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)> {
1427        if self.bsource.is_empty() || self.block.is_empty() {
1428            return None;
1429        }
1430        let l = self.line.unwrap();
1431        if self.pos.is_empty() {
1432            if l < self.block.len() - 1 {
1433                let s = self.block[l + 1].start;
1434                if s < self.bsource.len() {
1435                    return Some((self.block[l + 1].str(self.source), self.block[l + 1]));
1436                }
1437            }
1438            return None;
1439        } else if self.pos.start < self.bsource.len() {
1440            return Some((self.pos.str(self.source), self.pos));
1441        }
1442        None
1443    }
1444
1445    fn advance(&mut self, n: usize) {
1446        if self.bsource.is_empty() || self.block.is_empty() {
1447            return;
1448        }
1449        self.line_offset = None;
1450        if n < self.pos.len() && self.pos.padding() == 0 {
1451            self.pos.start += n;
1452            return;
1453        }
1454        let mut n = n;
1455        while n > 0 && self.pos.start < self.last {
1456            if self.pos.padding != 0 {
1457                self.pos.padding -= 1;
1458                n -= 1;
1459                continue;
1460            }
1461            if self.pos.start >= self.pos.stop - 1 && self.pos.stop < self.last {
1462                self.advance_line();
1463                n -= 1;
1464                continue;
1465            }
1466
1467            self.pos.start += 1;
1468            n -= 1;
1469        }
1470    }
1471
1472    fn advance_and_set_padding(&mut self, n: usize, padding: usize) {
1473        self.advance(n);
1474        if padding > self.pos.padding() {
1475            self.set_padding(padding);
1476        }
1477    }
1478
1479    fn advance_line(&mut self) {
1480        if self.bsource.is_empty() || self.block.is_empty() {
1481            return;
1482        }
1483        let l = match self.line {
1484            Some(l) => l + 1,
1485            None => 0,
1486        };
1487        if l < self.block.len() {
1488            self.set_position(l, self.block[l]);
1489        } else {
1490            self.pos.start = self.source().len();
1491            self.pos.stop = self.pos.start;
1492            self.pos.padding = 0;
1493        }
1494    }
1495
1496    fn advance_to_eol(&mut self) {
1497        if self.bsource.is_empty() || self.block.is_empty() {
1498            return;
1499        }
1500        self.line_offset = None;
1501        let c = self.bsource[self.pos.stop - 1];
1502        if c == b'\n' {
1503            self.pos.start = self.pos.stop - 1;
1504        } else {
1505            self.pos.start = self.pos.stop;
1506        }
1507    }
1508
1509    fn line_offset(&mut self) -> usize {
1510        if self.bsource.is_empty() || self.block.is_empty() {
1511            return 0;
1512        }
1513        if self.line_offset.is_none() {
1514            let mut v = 0;
1515            for i in self.head..self.pos.start {
1516                if self.bsource[i] == b'\t' {
1517                    v += util::tab_width(v);
1518                } else {
1519                    v += 1;
1520                }
1521            }
1522            v -= self.pos.padding();
1523            self.line_offset = Some(v);
1524        }
1525        self.line_offset.unwrap_or(0)
1526    }
1527
1528    fn precending_charater(&self) -> char {
1529        if self.pos.padding() != 0 {
1530            return ' ';
1531        }
1532        if self.pos.start() == 0 {
1533            return '\n';
1534        }
1535        if self.block.is_empty() {
1536            return '\n';
1537        }
1538        let first_line = &self.block[0];
1539        if self.line.unwrap_or(0) == 0 && self.pos.start() <= first_line.start() {
1540            return '\n';
1541        }
1542
1543        let mut i = self.pos.start() - 1;
1544        loop {
1545            if let Some(l) = utf8_len(self.bsource[i]) {
1546                if l == 1 {
1547                    return self.bsource[i] as char;
1548                }
1549                return str::from_utf8(&self.bsource[i..i + l])
1550                    .ok()
1551                    .and_then(|s| s.chars().next())
1552                    .unwrap_or('\u{FFFD}');
1553            }
1554            i -= 1;
1555            if i == 0 {
1556                break;
1557            }
1558        }
1559        if i == 0 {
1560            return '\n';
1561        }
1562        '\u{FFFD}'
1563    }
1564}
1565//   }}} BlockReader
1566
1567// }}} Reader
1568
1569// Tests {{{
1570
1571#[cfg(test)]
1572mod tests {
1573    use super::*;
1574
1575    #[allow(unused_imports)]
1576    #[cfg(all(not(feature = "std"), feature = "no-std-unix-debug"))]
1577    use crate::println;
1578
1579    #[test]
1580    fn test_segment() {
1581        let buffer = "Hello, world!";
1582        let segment: Segment = (0, 5).into();
1583        let s: &[u8] = &segment.bytes(buffer);
1584        assert_eq!(s, b"Hello");
1585
1586        let segment_with_padding = Segment::new_with_padding(0, 5, 3);
1587        let s: &[u8] = &segment_with_padding.bytes(buffer);
1588        assert_eq!(s, b"   Hello");
1589    }
1590
1591    #[test]
1592    fn test_raw() {
1593        let buffer = "Hello, world!";
1594        let index = Value::from((0, 5));
1595        let s: &[u8] = index.bytes(buffer);
1596        assert_eq!(s, b"Hello");
1597
1598        let raw_string = Value::from("Hello");
1599        let s: &[u8] = raw_string.bytes(buffer);
1600        assert_eq!(s, b"Hello");
1601
1602        let str: &str = index.str(buffer);
1603        assert_eq!(str, "Hello");
1604
1605        let string = String::from("Hello");
1606        let v = Value::from(string.as_str());
1607        assert_eq!(v.str(buffer), "Hello");
1608    }
1609
1610    #[test]
1611    fn test_bytes_reader() {
1612        let buffer = "Hello, world!\nThis is a test.\n";
1613        let mut reader = BasicReader::new(buffer);
1614        assert_eq!(reader.peek_byte(), b'H');
1615
1616        if let Some((line, segment)) = reader.peek_line_bytes() {
1617            assert_eq!(line.as_ref(), b"Hello, world!\n");
1618            assert_eq!(segment.start(), 0);
1619            assert_eq!(segment.stop(), 14);
1620        } else {
1621            panic!("Expected a line");
1622        }
1623
1624        reader.advance(7);
1625        assert_eq!(reader.peek_byte(), b'w');
1626
1627        reader.advance_line();
1628        assert_eq!(reader.peek_byte(), b'T');
1629
1630        if let Some((line, segment)) = reader.peek_line_bytes() {
1631            assert_eq!(line.as_ref(), b"This is a test.\n");
1632            assert_eq!(segment.start(), 14);
1633            assert_eq!(segment.stop(), 30);
1634        } else {
1635            panic!("Expected a line");
1636        }
1637
1638        reader.advance(100); // Advance beyond the end
1639        assert_eq!(reader.peek_byte(), EOS);
1640        assert!(reader.peek_line_bytes().is_none());
1641    }
1642
1643    #[test]
1644    fn test_bytes_reader_empty() {
1645        let buffer = "";
1646        let mut reader = BasicReader::new(buffer);
1647        assert_eq!(reader.peek_byte(), EOS);
1648        assert!(reader.peek_line_bytes().is_none());
1649        reader.advance(10);
1650        assert_eq!(reader.peek_byte(), EOS);
1651        assert!(reader.peek_line_bytes().is_none());
1652        reader.advance_line();
1653        assert_eq!(reader.peek_byte(), EOS);
1654        assert!(reader.peek_line_bytes().is_none());
1655    }
1656
1657    #[test]
1658    fn test_block_reader() {
1659        let buffer = "Hello, world!\nThis is a test.\n";
1660        let lines = [Segment::new(0, 14), Segment::new_with_padding(14, 30, 2)];
1661        let mut reader = BlockReader::new(buffer, &lines);
1662        assert_eq!(reader.peek_byte(), b'H');
1663
1664        if let Some((line, segment)) = reader.peek_line_bytes() {
1665            assert_eq!(line.as_ref(), b"Hello, world!\n");
1666            assert_eq!(segment.start(), 0);
1667            assert_eq!(segment.stop(), 14);
1668        } else {
1669            panic!("Expected a line");
1670        }
1671
1672        reader.advance(13);
1673        assert_eq!(reader.peek_byte(), b'\n');
1674
1675        reader.advance(1);
1676        assert_eq!(reader.peek_byte(), SPACE[0]);
1677
1678        if let Some((line, segment)) = reader.peek_line_bytes() {
1679            assert_eq!(line.as_ref(), b"  This is a test.\n");
1680            assert_eq!(segment.start(), 14);
1681            assert_eq!(segment.stop(), 30);
1682            assert_eq!(segment.padding(), 2);
1683        } else {
1684            panic!("Expected a line");
1685        }
1686
1687        reader.advance(3);
1688        assert_eq!(reader.peek_byte(), b'h');
1689
1690        reader.advance(100); // Advance beyond the end
1691        assert_eq!(reader.peek_byte(), EOS);
1692        assert!(reader.peek_line_bytes().is_none());
1693    }
1694
1695    #[test]
1696    fn test_block_reader_empty() {
1697        let buffer = "";
1698        let lines: [Segment; 0] = [];
1699        let mut reader = BlockReader::new(buffer, &lines);
1700        assert_eq!(reader.peek_byte(), EOS);
1701        assert!(reader.peek_line_bytes().is_none());
1702        reader.advance(10);
1703        assert_eq!(reader.peek_byte(), EOS);
1704        assert!(reader.peek_line_bytes().is_none());
1705        reader.advance_line();
1706        assert_eq!(reader.peek_byte(), EOS);
1707        assert!(reader.peek_line_bytes().is_none());
1708    }
1709}
1710
1711// }}} Tests