welly_parser/
stream.rs

1use std::{fmt};
2use std::ops::{Range};
3use std::str::{Chars};
4
5use super::{Tree, EndOfFile};
6
7/// A position in source code in a form that can be reported to the user.
8/// More precisely, a `Location` represents a contiguous range of bytes of
9/// source code.
10#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
11pub struct Location {
12    /// The byte index where this `Location` begins.
13    pub start: usize,
14    /// The byte index following this `Location`.
15    pub end: usize,
16}
17
18impl Location {
19    /// A dummy value that can be used for things like [`EndOfFile`].
20    pub const EVERYWHERE: Location = Location {start: usize::MIN, end: usize::MAX};
21}
22
23impl fmt::Debug for Location {
24    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
25        write!(f, "{}..{}", self.start, self.end)
26    }
27}
28
29impl From<Range<usize>> for Location {
30    fn from(value: Range<usize>) -> Self { Self {start: value.start, end: value.end} }
31}
32
33// ----------------------------------------------------------------------------
34
35/// Represents a `T` with a [`Location`].
36///
37/// This is commonly used to represent bits of a parse tree, remembering where
38/// they came from in the source code.
39#[derive(Copy, Clone)]
40pub struct Loc<T>(pub T, pub Location);
41
42impl<T> Loc<T> {
43    /// Convert an `&Loc<T>` to a `Loc<&T>`.
44    pub fn as_ref(&self) -> Loc<&T> { Loc(&self.0, self.1) }
45}
46
47impl<T: fmt::Debug> fmt::Debug for Loc<T> {
48    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
49        self.0.fmt(f)?;
50        write!(f, " ({:?})", self.1)
51    }
52}
53
54impl<U, T: PartialEq<U>> PartialEq<U> for Loc<T> {
55    fn eq(&self, other: &U) -> bool { self.0 == *other }
56}
57
58// ----------------------------------------------------------------------------
59
60/// Represents a parse [`Tree`] or a parse error, with a [`Location`].
61///
62/// - Token(Loc(Ok(t), location)) represents a parse-tree `t`.
63///   `t` can be [`EndOfFile`] to represent the end of the source code.
64///   In this case, the `Location` is spurious.
65/// - Token(Loc(Err(e), location)) represents an error message `e`.
66///   `e` can be the empty string to mark the end of incomplete source code.
67///   In this case, the `Location` is spurious.
68#[derive(Debug)]
69pub struct Token(pub Loc<Result<Box<dyn Tree>, String>>);
70
71impl Token {
72    /// Constructs a `Self` from a `Tree` and its `Location`.
73    pub fn new(tree: Box<dyn Tree>, location: impl Into<Location>) -> Self {
74        Token(Loc(Ok(tree), location.into()))
75    }
76
77    /// Constructs a `Self` from an error message and its `Location`.
78    pub fn new_err(message: impl Into<String>, location: impl Into<Location>) -> Self {
79        Token(Loc(Err(message.into()), location.into()))
80    }
81
82    /// Returns an `EndOfFile`, to indicate the end of the source code.
83    pub fn end_of_file() -> Self { Self::new(Box::new(EndOfFile), Location::EVERYWHERE) }
84
85    /// Returns an empty error message, to indicate incomplete source code.
86    pub fn incomplete() -> Self { Self::new_err("", Location::EVERYWHERE) }
87
88    /// Returns the [`Location`] of `self`.
89    pub fn location(&self) -> Location { self.0.1 }
90
91    /// Throws away the `location`.
92    pub fn result(self) -> Result<Box<dyn Tree>, String> { self.0.0 }
93
94    /// Throws away the `location`.
95    pub fn result_ref(&self) -> &Result<Box<dyn Tree>, String> { &self.0.0 }
96
97    /// Tests whether `self` is a `T`.
98    pub fn is<T: Tree>(&self) -> bool {
99        if let Ok(t) = self.result_ref() { t.is::<T>() } else { false }
100    }
101
102    /// Tests whether `self` marks the end of incomplete source code.
103    pub fn is_incomplete(&self) -> bool {
104        if let Err(e) = self.result_ref() { e.len() == 0 } else { false }
105    }
106
107    /// Discard the [`Location`], panic on `Err`, and panic if the payload is
108    /// not of type `T`.
109    ///
110    /// This is useful in test code.
111    pub fn unwrap<T: Tree>(self) -> T {
112        *self.result().unwrap().downcast::<T>().unwrap()
113    }
114
115    /// Discard the [`Location`], panic on `Ok`, return the error message.
116    ///
117    /// This is useful in test code.
118    pub fn unwrap_err(self) -> String {
119        self.result().unwrap_err()
120    }
121}
122
123impl<T: Tree + PartialEq> std::cmp::PartialEq<T> for Token {
124    fn eq(&self, other: &T) -> bool {
125        if let Ok(t) = self.result_ref() { **t == *other } else { false }
126    }
127}
128
129// ----------------------------------------------------------------------------
130
131/// Yields [`Token`]s.
132///
133/// Differences from an [`Iterator`]:
134/// - The item type is always [`Token`].
135/// - `read()` always returns an item.
136pub trait Stream {
137    /// Read a single `Token`.
138    fn read(&mut self) -> Token;
139
140    /// Read and return all `Token`s upto [`EndOfFile`], which is discarded.
141    fn read_all(mut self) -> Vec<Token> where Self: Sized {
142        let mut ret = Vec::new();
143        let mut token = self.read();
144        while token != EndOfFile {
145            ret.push(token);
146            token = self.read();
147        }
148        ret
149    }
150}
151
152impl<I: Iterator<Item=Token>> Stream for I {
153    fn read(&mut self) -> Token {
154        self.next().unwrap_or_else(|| Token::end_of_file())
155    }
156}
157
158// ----------------------------------------------------------------------------
159
160/// A [`Stream`] through a [`str`].
161///
162/// The [`Token`]s are `char`s. Their [`Location`]s are relative to the `str`.
163///
164/// The `Stream` is terminated with [`Token::end_of_file()`] if `is_complete`,
165/// otherwise with [`Token::incomplete()`].
166pub struct Characters<'a> {
167    /// An Iterator through the source code.
168    chars: Chars<'a>,
169
170    /// The byte length of the source code.
171    length: usize,
172
173    /// `true` for `Token::end_of_file()`, otherwise `Token::incomplete()`.
174    is_complete: bool,
175}
176
177impl<'a> Characters<'a> {
178    /// Iterate through `source`.
179    ///
180    /// - is_complete - Determines the `Token` appended to the end of `source`.
181    ///   `true` gives `Token::end_of_file()`, otherwise `Token::incomplete()`.
182    pub fn new(source: &'a str, is_complete: bool) -> Self {
183        Self {chars: source.chars(), length: source.len(), is_complete}
184    }
185
186    /// Returns the current byte index in the `str`.
187    pub fn index(&self) -> usize { self.length - self.chars.as_str().len() }
188}
189
190impl<'a> Stream for Characters<'a> {
191    fn read(&mut self) -> Token {
192        let start = self.index();
193        if let Some(c) = self.chars.next() {
194            let end = self.index();
195            Token::new(Box::new(c), start..end)
196        } else if self.is_complete { Token::end_of_file() } else { Token::incomplete() }
197    }
198}