lexer_rs/lexer/
traits.rs

1//a Imports
2
3//a LexerError
4//tt LexerError
5/// A trait required of an error within a Lexer - a char that does not
6/// match any token parser rust return an error, and this trait
7/// requires that such an error be provided
8///
9/// It might be nice to have this take the [Lexer] too, but then there
10/// is a cycle in that Lexer::Error will in general depend on Lexer
11/// which depends on Lexer::Error... This breaks code (and the compiler
12/// tends to hang forever)
13pub trait LexerError<P>: Sized + std::error::Error {
14    /// Return an error indicating that a bad character (could not be
15    /// matched for a token) has occurred at the position indicated by
16    /// the state
17    fn failed_to_parse(state: P, ch: char) -> Self;
18}
19
20//a Lexer
21//tt Lexer
22/// The [Lexer] trait is provided by stream types that support parsing
23/// into tokens.
24///
25/// The trait itself requires:
26///
27/// * a token type that the [Lexer] will produce
28///
29/// * a stream state (often just a byte offset) that can be tracked
30///   during parsing
31///
32/// * an error type that suports [LexerError] so that the lexer can
33///   generate a failure should a token parse fail
34///
35/// The [Lexer] will parse its stream provided to it by matching data in
36/// the stream to tokens using parser functions. Such functions are
37/// invoked with a reference to the stream being parsed, the stream
38/// state, and the next character in the stream (the one pointed to by
39/// the stream state).
40///
41/// The signature is:
42///
43/// ```ignore
44///    fn parse(stream: &LexerOfStr<P, T, E>, pos:P, ch:char) ->
45///               LexerParseResult<P, T, E>
46/// ```
47///
48/// where
49///
50/// ```ignore
51///    LexerParseResult<P, T, E> = Result<Option<P, T>, E>
52/// ```
53///
54/// Parsing functions examine the character they are given, and
55/// possibly more characters by accessing the stream using the provide
56/// state. If they match, they return an Ok result with the token they
57/// have parsed to, *and* an updated state which is *beyond* the
58/// matched token.
59///
60/// If the parser function mismatches then it returns an Ok result of None
61///
62/// If the parser function hits a fatal error (for example, a stream
63/// indicates a network disconnection) then it must return an Err with
64/// the appropriate error (of its provided Error type).
65///
66/// Parser functions are provided to the [Lexer] as an array of Box dyn
67/// functions, such as:
68///
69/// ```ignore
70///       let parsers = [
71///            Box::new(parse_char_fn) as BoxDynLexerParseFn<OurLexer>
72///            Box::new(parse_value_fn),
73///            Box::new(parse_whitespace_fn),
74///        ];
75/// ```
76///
77/// Note that the use of 'as Box...' is required, as without it type
78/// inference will kick in on the Box::new() to infer parse_char_fn as
79/// a precise type, whereas the more generic dyn Fn is what is required.
80///
81/// This trait is provided in part to group the types for a lexical
82/// parser together, enabling simpler type inference and less
83/// turbofish syntax in clients of the lexical analysis.
84pub trait Lexer: std::fmt::Debug {
85    /// The Token type is the type of the token to be returned by the
86    /// Lexer; it is used as part of the result of the [Lexer] parse
87    /// functions.
88    type Token: Sized + std::fmt::Debug;
89
90    /// The State of the stream that is used and returned by the parse
91    /// functions; it must be copy as it is replicated constantly
92    /// throughout the parsing process.
93    ///
94    /// This can be a [crate::StreamCharPos]
95    type State: Sized + Copy + std::fmt::Debug + Default;
96
97    /// The error type returned by the parser functions in the lexical analyzer
98    type Error: LexerError<Self::State>;
99
100    /// This attempts to parse the next token found at the state of
101    /// the [Lexer] stream, by applying the parsers in order.
102    ///
103    /// An error is returned if the token cannot be parsed
104    fn parse<'a>(
105        &'a self,
106        state: Self::State,
107        parsers: &[BoxDynLexerParseFn<'a, Self>],
108    ) -> LexerParseResult<Self::State, Self::Token, Self::Error>;
109
110    /// This creates an iterator over all of the tokens in the [Lexer]
111    /// stream, by applying the parsers in order at the current stream
112    /// position whenever the 'next' method is invoked.
113    ///
114    /// The iterator returns None when the end of stream is reached,
115    /// otherwise it returns a result of the token or an error,
116    /// depending on the success of the parsers.
117    fn iter<'iter>(
118        &'iter self,
119        parsers: &'iter [BoxDynLexerParseFn<'iter, Self>],
120    ) -> Box<dyn Iterator<Item = Result<Self::Token, Self::Error>> + 'iter>;
121}
122
123//tp LexerParseResult
124/// The return value for a Lexer parse function
125///
126/// This *could* have been defined as:
127///
128///    pub type LexerParseResult<L:Lexer>
129///      = Result<Option<(<L as Lexer>::State, <L as Lexer>::Token)>, <L as Lexer>::Error>;
130///
131/// But then clients that have their type L with a lifetime (which is common) would have a parse
132/// result that must be indicated by a lifetime, where the actual result *does not*.
133///
134/// This causes problems for clients
135pub type LexerParseResult<S, T, E> = Result<Option<(S, T)>, E>;
136
137//tp LexerParseFn
138/// The type of a parse function
139pub type LexerParseFn<L> =
140    fn(
141        lexer: &L,
142        <L as Lexer>::State,
143        char,
144    ) -> LexerParseResult<<L as Lexer>::State, <L as Lexer>::Token, <L as Lexer>::Error>;
145
146//tp BoxDynLexerParseFn
147/// The type of a parse function, when Boxed as a dyn trait
148///
149/// This type can be used in arrays/slices to allow a Lexer to run
150/// through a list of possible token parsers such as:
151///
152/// ```ignore
153///       let parsers = [
154///            Box::new(parse_char_fn) as BoxDynLexerParseFn<OurLexer>
155///            Box::new(parse_value_fn),
156///            Box::new(parse_whitespace_fn),
157///        ];
158/// ```
159///
160/// Note that the use of 'as Box...' is required, as without it type
161/// inference will kick in on the Box::new() to infer parse_char_fn as
162/// a precise type, whereas the more generic dyn Fn is what is required.
163pub type BoxDynLexerParseFn<'a, L> = Box<
164    dyn for<'call> Fn(
165            &'call L,
166            <L as Lexer>::State,
167            char,
168        ) -> LexerParseResult<
169            <L as Lexer>::State,
170            <L as Lexer>::Token,
171            <L as Lexer>::Error,
172        > + 'a,
173>;