generic_lexer/
lib.rs

1mod input;
2
3pub use input::BufferedInput;
4
5use std::error::Error;
6use std::fmt;
7
8/// A token with a kind (usually an enum representing distinct token types) and its source text
9
10#[derive(Debug)]
11pub struct Token<K> {
12    kind: K,
13    text: String,
14}
15
16impl<K> Token<K> {
17    /// Create a new token with the given kind and text
18    pub fn new(kind: K, text: String) -> Token<K> {
19        Token { kind, text }
20    }
21
22    /// Return the token's kind (usually an enum)
23    pub fn kind(&self) -> &K {
24        &self.kind
25    }
26
27    /// Return the token's text
28    pub fn text(&self) -> &String {
29        &self.text
30    }
31
32    pub fn into_text(self) -> String {
33        self.text
34    }
35}
36
37#[derive(Debug)]
38pub enum MatchError {
39    Unexpected(char),
40    Custom(String),
41}
42
43impl fmt::Display for MatchError {
44    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45        match *self {
46            MatchError::Unexpected(c) => writeln!(f, "Unexpected '{}'", c),
47            MatchError::Custom(ref msg) => msg.fmt(f),
48        }
49    }
50}
51
52impl Error for MatchError {}
53
54pub type MatchResult<T> = Result<T, MatchError>;
55
56/// A matcher fn matches a character (and and any following characters) and returns a `T`
57/// to indicate the kind of token (see `Token`)
58///
59/// `input` is always fresh (i.e. its buffer is empty)
60pub trait Matcher<K> {
61    fn try_match(&self, first_char: char, input: &mut BufferedInput) -> MatchResult<K>;
62}
63
64impl<F, K> Matcher<K> for F
65    where F: Fn(char, &mut BufferedInput) -> MatchResult<K> {
66    fn try_match(&self, first_char: char, input: &mut BufferedInput) -> MatchResult<K> {
67        (*self)(first_char, input)
68    }
69}
70
71/// A lexer splits a source string into tokens using the given `MatcherFn`
72pub struct Lexer<'a, K> {
73    input: BufferedInput<'a>,
74    matcher: &'a dyn Matcher<K>,
75    skip_whitespace: bool,
76}
77
78impl<'a, K> Lexer<'a, K> {
79    pub fn new(input: &'a str, matcher: &'a dyn Matcher<K>, skip_whitespace: bool) -> Lexer<'a, K> {
80        Lexer {
81            input: BufferedInput::new(input),
82            matcher,
83            skip_whitespace,
84        }
85    }
86}
87
88impl<'a, K> Iterator  for Lexer<'a, K> {
89    type Item = MatchResult<Token<K>>;
90
91    fn next(&mut self) -> Option<Self::Item> {
92        // skip whitespace
93        if self.skip_whitespace {
94            self.input.skip_whitespace();
95        }
96
97        // get first character
98        let first_char = match self.input.accept() {
99            Some(byte) => byte,
100            None => return None,
101        };
102
103        // match a token kind and mark the end of the token
104        let kind = match self.matcher.try_match(first_char, &mut self.input) {
105            Ok(kind) => kind,
106            Err(err) => return Some(Err(err)),
107        };
108
109        // create a `Token` wrapper and return it
110        Some(Ok(Token::new(kind, self.input.take_buffer())))
111    }
112}
113