lexer_rs/
char_stream.rs

1//a Imports
2use crate::{PosnInCharStream, StreamCharSpan};
3
4//tt CharStream
5/// The [CharStream] trait allows a stream of [char] to provide extraa methods
6///
7/// Requires P : PosnInCharStream
8pub trait CharStream<P>
9where
10    P: PosnInCharStream,
11{
12    /// Steps along the stream starting at the provided state (and
13    /// character) while the provided function returns true; the
14    /// function is provided with the index and character (starting at
15    /// 0 / ch), and it returns true if the token continues, otherwise
16    /// false
17    ///
18    /// If the first invocation of 'f' returns false then the token is
19    /// said to not match, and 'do_while' returns the stream state and Ok(None).
20    ///
21    /// If the first N (more than zero) invocations match then the
22    /// result is the stream state after the matched characters, and
23    /// Some(initial state, N)
24    ///
25    /// This can be used to match whitespace (where N is probably
26    /// discarded), or user 'id' values in a language. The text can be
27    /// retrieved with the 'get_text' method
28    fn do_while<F: Fn(usize, char) -> bool>(
29        &self,
30        state: P,
31        ch: char,
32        f: &F,
33    ) -> (P, Option<(P, usize)>) {
34        if !f(0, ch) {
35            return (state, None);
36        }
37        let start = state;
38        let mut n = 1;
39        let mut state = self.consumed_char(state, ch);
40        // # Safety
41        //
42        // 'ofs' is maintained as a utf8 character point boundary
43        // within or at the end of the 'str' borrowed by [Self]
44        while let Some(ch) = self.peek_at(&state) {
45            if !f(n, ch) {
46                break;
47            }
48            n += 1;
49            state = self.consumed_char(state, ch);
50        }
51        (state, Some((start, n)))
52    }
53
54    /// Steps along the stream starting at the provided state,
55    /// character and accumulator value while the provided function
56    /// returns (true, new accumulator); the function is provided with
57    /// the latest accumulator, index, character (starting at 0 / ch),
58    /// and it returns true and a new accumulator if the token
59    /// continues, otherwise false and the final accumulator value
60    ///
61    /// If the first invocation of 'f' returns false then the token is
62    /// said to not match, and 'fold' returns the stream state and Ok(None).
63    ///
64    /// If the first N (more than zero) invocations match then the
65    /// result is the stream state after the matched characters, and
66    /// Some(initial state, N, final accumulator)
67    ///
68    /// This can be used to accumulate significant state about a token
69    /// as it is parsed, in excess of the simple number of characters.
70    fn fold<T, F: Fn(&Self, T, &P, usize, char) -> (T, Option<P>)>(
71        &self,
72        state: P,
73        ch: char,
74        acc: T,
75        f: &F,
76    ) -> (P, Option<(P, usize, T)>) {
77        let (mut acc, some_posn) = f(self, acc, &state, 0, ch);
78        if some_posn.is_none() {
79            return (state, None);
80        }
81        let start = state;
82        let mut n = 1;
83        let mut state = some_posn.unwrap();
84        while let Some(ch) = self.peek_at(&state) {
85            let (new_acc, more_posn) = f(self, acc, &state, n, ch);
86            acc = new_acc;
87            if more_posn.is_none() {
88                break;
89            }
90            n += 1;
91            state = more_posn.unwrap();
92        }
93        (state, Some((start, n, acc)))
94    }
95
96    /// Retrieve a range of bytes from the stream
97    fn range_as_bytes(&self, ofs: usize, n: usize) -> &[u8];
98
99    /// Return true if the content of the stream at 'state' matches
100    /// the byte slice
101    fn matches_bytes(&self, state: &P, s: &[u8]) -> bool;
102
103    /// Get the text between the start of a span (inclusive) and the
104    /// end of the span (exclusive).
105    fn get_text_span(&self, span: &StreamCharSpan<P>) -> &str
106    where
107        P: PosnInCharStream;
108
109    /// Get the text between the start (inclusive) and the
110    /// end (exclusive).
111    fn get_text(&self, start: P, end: P) -> &str;
112
113    // Return true if the text at 'pos' matches the string
114    //
115    // Waiting for pattern stabiliztion
116    // fn matches<'call, P:std::str::pattern::Pattern<'call>>(&self, pos: &P, pat: P) -> bool;
117
118    /// Match the text at the offset with a str; return true if it matches, else false
119    fn matches_str(&self, pos: &P, pat: &str) -> bool;
120
121    /// Peek at the next character in the stream, returning None if
122    /// the state is the end of the stream
123    fn peek_at(&self, state: &P) -> Option<char>;
124
125    //cp consumed
126    /// Move the stream state forward by the specified number of characters
127    ///
128    /// The characters MUST NOT inclulde newlines
129    fn consumed(&self, state: P, num_chars: usize) -> P;
130
131    //cp consumed_char
132    /// Get a stream state after consuming the specified character at
133    /// its current state
134    fn consumed_char(&self, state: P, ch: char) -> P
135    where
136        P: PosnInCharStream,
137    {
138        if ch == '\n' {
139            state.advance_line(1)
140        } else {
141            state.advance_cols(ch.len_utf8(), 1)
142        }
143    }
144
145    //cp consumed_newline
146    /// Get a stream state after consuming a newline at its current state
147    ///
148    /// # Safety
149    ///
150    /// num_bytes *must* correspond to the number of bytes that the
151    /// newline character consists of, and state *must* point to the
152    /// bytes offset of that character
153    unsafe fn consumed_newline(&self, state: P, num_bytes: usize) -> P
154    where
155        P: PosnInCharStream,
156    {
157        state.advance_line(num_bytes)
158    }
159
160    //cp consumed_ascii_str
161    /// Get the state after consuming a particular ascii string
162    /// without newlines
163    ///
164    /// This is safe as there is no unsafe handling of byte offsets
165    /// within *state*; however, there is no check that the provided
166    /// string is ASCII and that it does not contain newlines. If
167    /// these API rules are broke then the lie and column held by
168    /// *state* may be incorrect (which is not *unsafe*, but
169    /// potentially a bug)
170    fn consumed_ascii_str(&self, state: P, s: &str) -> P
171    where
172        P: PosnInCharStream,
173    {
174        let n = s.len();
175        state.advance_cols(n, n)
176    }
177
178    //cp consumed_chars
179    /// Become the span after consuming a particular string of known character length
180    ///
181    /// # Safety
182    ///
183    /// num_bytes *must* correspond to the number of bytes that
184    /// 'num_chars' indicates start at *state*. If this constraint is
185    /// not met then the byte offset indicated by the returned value
186    /// may not correspond to a UTF8 character boundary within the
187    /// stream.
188    unsafe fn consumed_chars(&self, state: P, num_bytes: usize, num_chars: usize) -> P
189    where
190        P: PosnInCharStream,
191    {
192        state.advance_cols(num_bytes, num_chars)
193    }
194
195    //mp commit_consumed
196    /// Invoked by the Lexer to indicate that the stream has been
197    /// consumed up to a certain point, and that (for parsing) no
198    /// state earlier in the stream will be requested in the future
199    ///
200    /// A truly streaming source can drop earlier data in the stream
201    /// if this fits the application
202    fn commit_consumed(&self, _up_to: &P) {}
203}