lexer_rs/lexer/traits.rs
1//a Imports
2
3//a LexerError
4//tt LexerError
5/// A trait required of an error within a Lexer - a char that does not
6/// match any token parser rust return an error, and this trait
7/// requires that such an error be provided
8///
9/// It might be nice to have this take the [Lexer] too, but then there
10/// is a cycle in that Lexer::Error will in general depend on Lexer
11/// which depends on Lexer::Error... This breaks code (and the compiler
12/// tends to hang forever)
13pub trait LexerError<P>: Sized + std::error::Error {
14 /// Return an error indicating that a bad character (could not be
15 /// matched for a token) has occurred at the position indicated by
16 /// the state
17 fn failed_to_parse(state: P, ch: char) -> Self;
18}
19
20//a Lexer
21//tt Lexer
22/// The [Lexer] trait is provided by stream types that support parsing
23/// into tokens.
24///
25/// The trait itself requires:
26///
27/// * a token type that the [Lexer] will produce
28///
29/// * a stream state (often just a byte offset) that can be tracked
30/// during parsing
31///
32/// * an error type that suports [LexerError] so that the lexer can
33/// generate a failure should a token parse fail
34///
35/// The [Lexer] will parse its stream provided to it by matching data in
36/// the stream to tokens using parser functions. Such functions are
37/// invoked with a reference to the stream being parsed, the stream
38/// state, and the next character in the stream (the one pointed to by
39/// the stream state).
40///
41/// The signature is:
42///
43/// ```ignore
44/// fn parse(stream: &LexerOfStr<P, T, E>, pos:P, ch:char) ->
45/// LexerParseResult<P, T, E>
46/// ```
47///
48/// where
49///
50/// ```ignore
51/// LexerParseResult<P, T, E> = Result<Option<P, T>, E>
52/// ```
53///
54/// Parsing functions examine the character they are given, and
55/// possibly more characters by accessing the stream using the provide
56/// state. If they match, they return an Ok result with the token they
57/// have parsed to, *and* an updated state which is *beyond* the
58/// matched token.
59///
60/// If the parser function mismatches then it returns an Ok result of None
61///
62/// If the parser function hits a fatal error (for example, a stream
63/// indicates a network disconnection) then it must return an Err with
64/// the appropriate error (of its provided Error type).
65///
66/// Parser functions are provided to the [Lexer] as an array of Box dyn
67/// functions, such as:
68///
69/// ```ignore
70/// let parsers = [
71/// Box::new(parse_char_fn) as BoxDynLexerParseFn<OurLexer>
72/// Box::new(parse_value_fn),
73/// Box::new(parse_whitespace_fn),
74/// ];
75/// ```
76///
77/// Note that the use of 'as Box...' is required, as without it type
78/// inference will kick in on the Box::new() to infer parse_char_fn as
79/// a precise type, whereas the more generic dyn Fn is what is required.
80///
81/// This trait is provided in part to group the types for a lexical
82/// parser together, enabling simpler type inference and less
83/// turbofish syntax in clients of the lexical analysis.
84pub trait Lexer: std::fmt::Debug {
85 /// The Token type is the type of the token to be returned by the
86 /// Lexer; it is used as part of the result of the [Lexer] parse
87 /// functions.
88 type Token: Sized + std::fmt::Debug;
89
90 /// The State of the stream that is used and returned by the parse
91 /// functions; it must be copy as it is replicated constantly
92 /// throughout the parsing process.
93 ///
94 /// This can be a [crate::StreamCharPos]
95 type State: Sized + Copy + std::fmt::Debug + Default;
96
97 /// The error type returned by the parser functions in the lexical analyzer
98 type Error: LexerError<Self::State>;
99
100 /// This attempts to parse the next token found at the state of
101 /// the [Lexer] stream, by applying the parsers in order.
102 ///
103 /// An error is returned if the token cannot be parsed
104 fn parse<'a>(
105 &'a self,
106 state: Self::State,
107 parsers: &[BoxDynLexerParseFn<'a, Self>],
108 ) -> LexerParseResult<Self::State, Self::Token, Self::Error>;
109
110 /// This creates an iterator over all of the tokens in the [Lexer]
111 /// stream, by applying the parsers in order at the current stream
112 /// position whenever the 'next' method is invoked.
113 ///
114 /// The iterator returns None when the end of stream is reached,
115 /// otherwise it returns a result of the token or an error,
116 /// depending on the success of the parsers.
117 fn iter<'iter>(
118 &'iter self,
119 parsers: &'iter [BoxDynLexerParseFn<'iter, Self>],
120 ) -> Box<dyn Iterator<Item = Result<Self::Token, Self::Error>> + 'iter>;
121}
122
123//tp LexerParseResult
124/// The return value for a Lexer parse function
125///
126/// This *could* have been defined as:
127///
128/// pub type LexerParseResult<L:Lexer>
129/// = Result<Option<(<L as Lexer>::State, <L as Lexer>::Token)>, <L as Lexer>::Error>;
130///
131/// But then clients that have their type L with a lifetime (which is common) would have a parse
132/// result that must be indicated by a lifetime, where the actual result *does not*.
133///
134/// This causes problems for clients
135pub type LexerParseResult<S, T, E> = Result<Option<(S, T)>, E>;
136
137//tp LexerParseFn
138/// The type of a parse function
139pub type LexerParseFn<L> =
140 fn(
141 lexer: &L,
142 <L as Lexer>::State,
143 char,
144 ) -> LexerParseResult<<L as Lexer>::State, <L as Lexer>::Token, <L as Lexer>::Error>;
145
146//tp BoxDynLexerParseFn
147/// The type of a parse function, when Boxed as a dyn trait
148///
149/// This type can be used in arrays/slices to allow a Lexer to run
150/// through a list of possible token parsers such as:
151///
152/// ```ignore
153/// let parsers = [
154/// Box::new(parse_char_fn) as BoxDynLexerParseFn<OurLexer>
155/// Box::new(parse_value_fn),
156/// Box::new(parse_whitespace_fn),
157/// ];
158/// ```
159///
160/// Note that the use of 'as Box...' is required, as without it type
161/// inference will kick in on the Box::new() to infer parse_char_fn as
162/// a precise type, whereas the more generic dyn Fn is what is required.
163pub type BoxDynLexerParseFn<'a, L> = Box<
164 dyn for<'call> Fn(
165 &'call L,
166 <L as Lexer>::State,
167 char,
168 ) -> LexerParseResult<
169 <L as Lexer>::State,
170 <L as Lexer>::Token,
171 <L as Lexer>::Error,
172 > + 'a,
173>;