welly_parser/
buffer.rs

1use std::rc::{Rc};
2
3use super::{bracket, stmt, parsers, EndOfFile, Location, Token, Stream, Characters, Parse};
4
5/// Pipes `source` (which should produce [`char`]s) through:
6/// - a lexer,
7/// - `word_parser`,
8/// - two bracket matchers,
9/// - an [`Expr`] parser, and
10/// - (if the tightest brackets are not round) a [`Stmt`] parser.
11fn make_parser<'a>(source: impl 'a + Stream, word_parser: &'a parsers::Word) -> impl 'a + Stream {
12    let stream = parsers::LEXER.parse_stream(source);
13    let stream = word_parser.parse_stream(stream);
14    parsers::brace(stream)
15}
16
17/// Read [`Token`]s from `stream` until finding one that heuristically might be
18/// the end of a `Stmt`, and return its [`Location`] if successful.
19///
20/// Use this only to recover after an error, because it discards source code.
21fn skip(stream: &mut impl Stream) -> Option<Location> {
22    loop {
23        let token = stream.read();
24        if token.is_incomplete() || token.is::<EndOfFile>() { return None; }
25        if token == ';' { return Some(token.location()); }
26        if token.is::<bracket::Brace>() { return Some(token.location()); }
27        if token.is::<stmt::Stmt>() {
28            // Oops! We read too far. Oh well, discard it.
29            return Some(token.location());
30        }
31    }
32}
33
34// ----------------------------------------------------------------------------
35
36/// A growable source file that can be parsed incrementally.
37///
38/// ```
39/// let mut buffer = welly_parser::Buffer::default();
40/// buffer.push_str("hw = \"Hello, world!\\n\";\n");
41/// buffer.push_str("for _ in 10 { print(hw); }");
42/// while let Some(token) = buffer.try_parse() {
43///     println!("{:#?}", token);
44/// }
45/// ```
46#[derive(Debug)]
47pub struct Buffer {
48    /// A cache of [`parsers::word()`].
49    word_parser: parsers::Word,
50
51    /// Source code that has been received but not yet parsed.
52    source: String,
53
54    /// `true` if all source code has been received.
55    is_complete: bool,
56}
57
58impl Default for Buffer {
59    fn default() -> Self {
60        Self {word_parser: parsers::word(), source: String::new(), is_complete: false}
61    }
62}
63
64impl Buffer {
65    /// Returns the suffix of the source code that has not yet been parsed.
66    pub fn remainder(&self) -> &str { &self.source }
67
68    /// Discard `remainder()`.
69    pub fn clear(&mut self) { self.source.clear(); }
70
71    /// Append `source` to the source code. Requires `!self.is_complete()`.
72    pub fn push_str(&mut self, source: &str) {
73        assert!(!self.is_complete());
74        self.source.push_str(source);
75    }
76
77    /// Inform `self` that it has all the source code.
78    ///
79    /// This can be important, as in the following example:
80    /// ```
81    /// let mut buffer = welly_parser::Buffer::default();
82    /// buffer.push_str("if c {}"); // Could be followed by `else`.
83    /// assert!(buffer.try_parse().is_none());
84    /// buffer.complete(); // Exclude `else`.
85    /// assert!(buffer.try_parse().is_some());
86    /// ```
87    pub fn complete(&mut self) { self.is_complete = true; }
88
89    /// Returns `true` if more source code can be added with `self.push_str()`.
90    pub fn is_complete(&self) -> bool { self.is_complete }
91
92    /// Attempt to parse [`self.remainder()`]. Hopefully the returned [`Token`]
93    /// is a [`Stmt`]. Other possibilities can be found in [`welly`].
94    ///
95    /// `Some((source, token))` indicates that there was enough source code to
96    /// parse a `Token` (which might be an error message). [`Location`]s are
97    /// relative to the returned `source`, which is removed from `self`.
98    ///
99    /// `None` indicates that there was not enough source code, either because
100    /// we need to wait for more, or because there is no more. In this case
101    /// `self` is not modified.
102    ///
103    /// [`Stmt`]: smtm::Stmt
104    // TODO: Return an `ast::Stmt`.
105    // TODO: Returns as many `Token`s as possible using a single parser, so
106    //       that skipping after an error works correctly.
107    pub fn try_parse(&mut self) -> Option<(Rc<str>, Token)> {
108        let (token, end) = {
109            let source = Characters::new(self.remainder(), self.is_complete);
110            let mut stream = make_parser(source, &self.word_parser);
111            let token = stream.read();
112            if token.is_incomplete() || token.is::<EndOfFile>() { return None; }
113            // Split off some source code including at least `token.0`.
114            let mut end = token.location().end;
115            if token.result_ref().is_err() { if let Some(loc) = skip(&mut stream) { end = loc.end; } }
116            (token, end)
117        };
118        let s: String = self.source.drain(..std::cmp::min(end, self.source.len())).collect();
119        Some((s.into(), token))
120    }
121}
122
123// ----------------------------------------------------------------------------
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128
129    fn check(
130        source: &'static str,
131        is_complete: bool,
132        expected: impl Into<Vec<&'static str>>,
133        expected_remainder: &'static str,
134    ) {
135        let mut buffer = Buffer::default();
136        buffer.push_str(source);
137        if is_complete { buffer.complete(); }
138        let mut tokens: Vec<String> = Vec::new();
139        while let Some((s, token)) = buffer.try_parse() {
140            let loc = token.location();
141            let span = String::from(&s[loc.start..loc.end]);
142            tokens.push(span);
143        }
144        assert_eq!(tokens, expected.into());
145        assert_eq!(buffer.remainder(), expected_remainder);
146    }
147
148    #[test]
149    fn whitespace() {
150        check(" ", true, [], " ");
151    }
152
153    #[test]
154    fn semicolon() {
155        check(" ; ", true, [";"], " ");
156    }
157
158    #[test]
159    fn five() {
160        check(" 5; ", true, ["5;"], " ");
161    }
162
163    #[test]
164    fn if_() {
165        check("if b {}", true, ["if b {}"], "");
166        check("if b {}", false, [], "if b {}");
167        check("if b {};", false, ["if b {}", ";"], "");
168    }
169
170    #[test]
171    fn if_else() {
172        check("if b {} else {}", true, ["if b {} else {}"], "");
173        check("if b {} else {}", false, ["if b {} else {}"], "");
174        check("if b {} else {};", false, ["if b {} else {}", ";"], "");
175    }
176
177    #[test]
178    fn fn_() {
179        check("fn f() {}\nx; y", true, ["fn f() {}\nx;", "y"], "");
180        check("fn f() {}\nx; y", false, ["fn f() {}\nx;"], " y");
181        check("fn f() {};\nx; y", false, ["fn f() {};", "x;"], " y");
182    }
183}