1use logos::{Logos, Span};
13use std::num::{IntErrorKind, ParseIntError};
14
15#[derive(Default, Debug, Clone, PartialEq)]
17pub enum LexicalErrorKind {
18 #[default]
19 InvalidToken,
20 InvalidInteger(ParseIntError),
21}
22
23impl From<ParseIntError> for LexicalErrorKind {
24 fn from(value: ParseIntError) -> Self {
25 Self::InvalidInteger(value)
26 }
27}
28
29#[derive(Debug, Clone, PartialEq)]
31pub struct LexicalError<'a> {
32 kind: LexicalErrorKind,
33 input: &'a str,
34 span: Span,
35}
36
37impl<'a> std::fmt::Display for LexicalError<'a> {
38 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
39 let line = self.input[..self.span.start]
40 .chars()
41 .filter(|&ch| ch == '\n')
42 .count()
43 + 1;
44
45 let column = self.span.start - self.input[..self.span.start].rfind("\n").unwrap_or(0);
46
47 let position = format!("line {}, column {}", line, column);
48
49 match &self.kind {
50 LexicalErrorKind::InvalidToken => write!(
51 f,
52 "Invalid token \"{}\" at {}",
53 &self.input[self.span.start..self.span.end],
54 position
55 )?,
56 LexicalErrorKind::InvalidInteger(inner) => write!(
57 f,
58 "Invalid number {} at {}: {}",
59 &self.input[self.span.start..self.span.end],
60 position,
61 match inner.kind() {
62 IntErrorKind::PosOverflow | IntErrorKind::NegOverflow => "overflow",
63 _ => "unknown",
64 }
65 )?,
66 };
67
68 Ok(())
69 }
70}
71
72fn string_from_lexer<'a>(lex: &mut logos::Lexer<'a, Token<'a>>) -> &'a str {
73 let slice = lex.slice();
74 &slice[1..slice.len() - 1]
75}
76
77#[derive(Clone, Debug, PartialEq, Logos)]
79#[logos(error = LexicalErrorKind)]
80#[logos(skip r"[\s\t\n\f]+")]
81pub enum Token<'a> {
82 #[regex(r"//.*", allow_greedy = true)]
83 SingleLineComment(&'a str),
84
85 #[regex(r"\/\*[^*]*\*+(?:[^\/*][^*]*\*+)*\/")]
86 MultiLineComment(&'a str),
87
88 #[token("=")]
89 Eq,
90
91 #[token(":")]
92 Colon,
93
94 #[token(";")]
95 Semicolon,
96
97 #[token(",")]
98 Comma,
99
100 #[token(".")]
101 Period,
102
103 #[token("(")]
104 OpenPth,
105
106 #[token(")")]
107 ClosePth,
108
109 #[token("[")]
110 OpenBracket,
111
112 #[token("]")]
113 CloseBracket,
114
115 #[token("{")]
116 OpenBrace,
117
118 #[token("}")]
119 CloseBrace,
120
121 #[token("<")]
122 OpenAngle,
123
124 #[token(">")]
125 CloseAngle,
126
127 #[token("true", |_| true)]
128 #[token("false", |_| false)]
129 Boolean(bool),
130
131 #[regex(r"-?[0-9]+", |lex| lex.slice().parse())]
132 #[regex(r"0x[0-9a-fA-F]{1,16}", |lex| i64::from_str_radix(&lex.slice()[2..], 16))]
133 Integer(i64),
134
135 #[token("to")]
136 To,
137
138 #[token("max")]
139 Max,
140
141 #[token("syntax")]
142 Syntax,
143
144 #[token("option")]
145 Option,
146
147 #[token("package")]
148 Package,
149
150 #[token("import")]
151 Import,
152
153 #[token("service")]
154 Service,
155
156 #[token("rpc")]
157 Rpc,
158
159 #[token("stream")]
160 Stream,
161
162 #[token("returns")]
163 Returns,
164
165 #[token("message")]
166 Message,
167
168 #[token("oneof")]
169 OneOf,
170
171 #[token("extend")]
172 Extend,
173
174 #[token("enum")]
175 Enum,
176
177 #[token("reserved")]
178 Reserved,
179
180 #[token("extensions")]
181 Extensions,
182
183 #[token("optional")]
184 Optional,
185
186 #[token("required")]
187 Required,
188
189 #[token("repeated")]
190 Repeated,
191
192 #[token("map")]
193 Map,
194
195 #[regex(r#"'((?:[^'\n]|(?:\\\'))*)'"#, string_from_lexer)]
196 #[regex(r#""((?:[^"\n]|(?:\\\"))*)""#, string_from_lexer)]
197 String(&'a str),
198
199 #[regex(r"[a-zA-Z_][a-zA-Z_0-9]*", priority = 0)]
200 Ident(&'a str),
201}
202
203impl<'a> std::fmt::Display for Token<'a> {
204 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
205 write!(f, "{:?}", self)
206 }
207}
208
209pub struct Lexer<'input> {
211 inner: logos::SpannedIter<'input, Token<'input>>,
212}
213
214impl<'input> Lexer<'input> {
215 pub fn new(src: &'input str) -> Self {
216 Self {
217 inner: Token::lexer(src).spanned(),
218 }
219 }
220}
221
222pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
224
225impl<'input> Iterator for Lexer<'input> {
226 type Item = Spanned<Token<'input>, usize, LexicalError<'input>>;
227
228 fn next(&mut self) -> Option<Self::Item> {
229 let (tok, span) = self.inner.next()?;
230
231 Some(
232 tok.map(|tok| (span.start, tok, span.end))
233 .map_err(|kind| LexicalError {
234 kind,
235 input: self.inner.source(),
236 span,
237 }),
238 )
239 }
240}