libreda_stream_parser/
lib.rs1#![deny(missing_docs)]
85
86use std::error::Error;
87use std::fmt;
88use std::iter::Peekable;
89use std::num::ParseIntError;
90use std::str::FromStr;
91
92use itertools::PeekingNext;
93
94pub trait Lexer {
97 type Char;
99
100 fn consume_next_token(
102 &mut self,
103 input: &mut (impl Iterator<Item = Self::Char> + PeekingNext),
104 output: impl FnMut(Self::Char),
105 ) -> Result<(), ParserError<Self::Char>>;
106}
107
108pub struct Tokenized<I, L>
111where
112 I: Iterator,
113{
114 iter: I,
116 lexer: L,
118 has_current: bool,
119 current_token: Option<Vec<I::Item>>,
120}
121
122impl<I, L> Iterator for Tokenized<I, L>
124where
125 I: Iterator + PeekingNext,
126 L: Lexer<Char = I::Item>,
127 I::Item: PartialEq + Eq + Clone + Copy + 'static,
128{
129 type Item = Vec<I::Item>;
130
131 fn next(&mut self) -> Option<Self::Item> {
132 self.next_ref().map(|e| e.to_vec())
133 }
134}
135
136impl<I, L> Tokenized<I, L>
137where
138 I: Iterator + PeekingNext,
139 L: Lexer<Char = I::Item>,
140 I::Item: PartialEq + Eq + Clone + Copy + 'static,
141{
142 pub fn next_ref(&mut self) -> Option<&[I::Item]> {
144 self.advance().ok().and_then(|_| self.current_token_ref())
145 }
146
147 pub fn take(&mut self) -> Result<Vec<I::Item>, ParserError<I::Item>> {
150 let s = self.current_token();
151 self.advance()?;
152 if let Some(s) = s {
153 Ok(s)
154 } else {
155 Err(ParserError::UnexpectedEndOfFile)
156 }
157 }
158
159 pub fn advance(&mut self) -> Result<(), ParserError<I::Item>> {
161 let mut buffer = self.current_token.take().unwrap_or_default();
162
163 buffer.clear();
164
165 self.lexer
166 .consume_next_token(&mut self.iter, |c| buffer.push(c))?;
167
168 let has_next = !buffer.is_empty();
169
170 if has_next {
171 self.current_token = Some(buffer);
172 }
173
174 self.has_current = has_next;
175 Ok(())
176 }
177
178 pub fn current_token_ref(&self) -> Option<&[I::Item]> {
180 if self.has_current {
181 self.current_token.as_deref()
182 } else {
183 None
184 }
185 }
186
187 pub fn current_token(&self) -> Option<Vec<I::Item>> {
189 self.current_token_ref().map(|s| s.to_vec())
190 }
191
192 pub fn expect(
197 &mut self,
198 s: impl IntoIterator<Item = I::Item> + Clone,
199 ) -> Result<(), ParserError<I::Item>> {
200 match &self.current_token {
201 None => Err(ParserError::UnexpectedEndOfFile)?,
202 Some(token) => {
203 if token.iter().copied().eq(s.clone()) {
204 self.advance()?;
205 Ok(())
206 } else {
207 Err(ParserError::UnexpectedToken(
208 s.into_iter().collect(),
209 self.current_token().unwrap().to_vec(),
210 ))
211 }
212 }
213 }
214 }
215
216 pub fn test(&mut self, s: &[I::Item]) -> Result<bool, ParserError<I::Item>> {
220 let result = self.peeking_test(s)?;
221 if result {
222 self.advance()?;
223 }
224 Ok(result)
225 }
226
227 pub fn peeking_test(&mut self, s: &[I::Item]) -> Result<bool, ParserError<I::Item>> {
231 if self.current_token.is_none() {
232 Err(ParserError::UnexpectedEndOfFile)?;
233 }
234
235 if self.current_token_ref() == Some(s) {
236 Ok(true)
237 } else {
238 Ok(false)
239 }
240 }
241
242 pub fn skip_until(&mut self, s: &[I::Item]) -> Result<(), ParserError<I::Item>> {
244 while !self.test(s)? {
245 self.advance()?;
246 }
247 Ok(())
248 }
249}
250impl<I, L> Tokenized<I, L>
251where
252 I: Iterator<Item = char> + PeekingNext,
253 L: Lexer<Char = I::Item>,
254{
255 pub fn current_token_str(&self) -> Option<String> {
258 self.current_token_ref().map(|s| s.iter().collect())
259 }
260
261 pub fn take_str(&mut self) -> Result<String, ParserError<I::Item>> {
263 let s = self.current_token_str();
264 self.advance()?;
265 if let Some(s) = s {
266 Ok(s)
267 } else {
268 Err(ParserError::UnexpectedEndOfFile)
269 }
270 }
271
272 pub fn take_and_parse<F: FromStr>(&mut self) -> Result<F, ParserError<I::Item>> {
275 let result = if let Some(token) = self.current_token_ref() {
276 let string: String = token.iter().collect();
277
278 if let Ok(parsed) = string.parse::<F>() {
279 Ok(parsed)
280 } else {
281 Err(ParserError::InvalidLiteral(token.to_vec()))
282 }
283 } else {
284 Err(ParserError::UnexpectedEndOfFile)
285 };
286
287 self.advance()?;
288
289 result
290 }
291
292 pub fn expect_str(&mut self, s: &str) -> Result<(), ParserError<I::Item>> {
297 match &self.current_token {
298 None => Err(ParserError::UnexpectedEndOfFile)?,
299 Some(token) => {
300 if token.iter().copied().eq(s.chars()) {
301 self.advance()?;
302 Ok(())
303 } else {
304 Err(ParserError::UnexpectedToken(
305 s.chars().collect(),
306 self.current_token().unwrap().to_vec(),
307 ))
308 }
309 }
310 }
311 }
312
313 pub fn test_str(&mut self, s: &str) -> Result<bool, ParserError<I::Item>> {
316 let result = self.peeking_test_str(s)?;
317 if result {
318 self.advance()?;
319 }
320 Ok(result)
321 }
322
323 pub fn peeking_test_str(&mut self, s: &str) -> Result<bool, ParserError<I::Item>> {
326 match &self.current_token {
327 None => Err(ParserError::UnexpectedEndOfFile)?,
328 Some(token) => Ok(token.iter().copied().eq(s.chars())),
329 }
330 }
331
332 pub fn skip_until_str(&mut self, s: &str) -> Result<(), ParserError<I::Item>> {
334 while !self.test_str(s)? {
335 self.advance()?;
336 }
337 Ok(())
338 }
339}
340
341pub fn tokenize<I, L>(iter: I, lexer: L) -> Tokenized<Peekable<I>, L>
344where
345 I: Iterator<Item = char>,
346{
347 Tokenized {
348 iter: iter.peekable(),
349 lexer,
350 has_current: false,
351 current_token: None,
352 }
353}
354
355#[derive(Clone, Debug)]
357pub enum ParserError<C: 'static> {
358 UnexpectedEndOfFile,
360 UnexpectedToken(Vec<C>, Vec<C>),
362 InvalidLiteral(Vec<C>),
364 ParseIntError(ParseIntError),
366}
367
368impl<C: 'static + fmt::Display + fmt::Debug> Error for ParserError<C> {}
369
370impl<C: fmt::Display + fmt::Debug> fmt::Display for ParserError<C> {
371 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
372 match self {
373 ParserError::UnexpectedEndOfFile => write!(f, "Unexpected end of file."),
374 ParserError::UnexpectedToken(actual, exp) => {
375 write!(f, "Unexpected token. '{actual:?}' instead of '{exp:?}'")
376 }
377 ParserError::InvalidLiteral(n) => write!(f, "Invalid literal: '{n:?}'."),
378 ParserError::ParseIntError(e) => write!(f, "Illegal integer: '{e:?}'"),
379 }
380 }
381}
382
383impl<C> From<ParseIntError> for ParserError<C> {
384 fn from(e: ParseIntError) -> Self {
385 Self::ParseIntError(e)
386 }
387}
388
389#[test]
390fn test_tokenize_simple() {
391 use itertools::Itertools;
392
393 struct MyLexer {}
394
395 impl Lexer for MyLexer {
396 type Char = char;
397
398 fn consume_next_token(
399 &mut self,
400 input: &mut (impl Iterator<Item = Self::Char> + PeekingNext),
401 mut output: impl FnMut(Self::Char),
402 ) -> Result<(), ParserError<char>> {
403 if let Some(c) = input.next() {
404 output(c);
405 let take_whitespace = c.is_whitespace();
406
407 input
408 .peeking_take_while(|c| c.is_whitespace() == take_whitespace)
409 .for_each(output);
410 }
411
412 Ok(())
413 }
414 }
415
416 let data = "here \n are \t some words ";
417
418 let mut tk = tokenize(data.chars(), MyLexer {});
419
420 tk.advance().unwrap();
421 tk.expect_str("here").unwrap();
422 tk.next();
423 tk.expect_str("are").unwrap();
424 tk.next();
425 tk.expect_str("some").unwrap();
426 tk.next();
427 tk.expect_str("words").unwrap();
428}