1use logos::{internal::LexerInternal, Lexer, Logos};
2
3#[derive(Clone)]
4pub struct Extras<'a> {
5 pub arena: &'a bumpalo::Bump,
6}
7
8#[derive(Logos, Debug, PartialEq)]
9#[logos(extras = Extras<'s>)]
10pub enum Token<'a> {
11 #[token("[")]
12 BracketOpen,
13
14 #[token("]")]
15 BracketClose,
16
17 #[token("{")]
18 BraceOpen,
19
20 #[token("}")]
21 BraceClose,
22
23 #[token("(")]
24 ParenOpen,
25
26 #[token(")")]
27 ParenClose,
28
29 #[token(":")]
30 Colon,
31
32 #[token("=")]
33 Equal,
34
35 #[token("!")]
36 Exclam,
37
38 #[token("...")]
39 Ellipsis,
40
41 #[regex(r"\$[_a-zA-Z][_0-9a-zA-Z]*", |lex| &lex.slice()[1..])]
42 VariableName(&'a str),
43
44 #[regex(r"@[_a-zA-Z][_0-9a-zA-Z]*", |lex| &lex.slice()[1..])]
45 DirectiveName(&'a str),
46
47 #[regex(r"[_a-zA-Z][_0-9a-zA-Z]*", |lex| lex.slice())]
48 Name(&'a str),
49
50 #[regex(r"-?([1-9][0-9]*|0)[.][0-9]+([eE][+-]?[0-9]+)?")]
51 #[regex("-?([1-9][0-9]*|0)[eE][+-]?[0-9]+")]
52 Float(&'a str),
53
54 #[regex(r"-?([1-9][0-9]*|0)")]
55 Integer(&'a str),
56
57 #[regex(r#"""?"?"#, parse_string)]
58 String(&'a str),
59
60 #[error]
61 #[regex(r"([ ,\t\n\r\f]+|#[^\n\r]*)+", logos::skip)]
62 Error,
63
64 End,
66}
67
68#[derive(Logos, Debug, PartialEq)]
69pub(crate) enum BlockPart {
70 #[regex(r#"[^"\\\r\n]+"#)]
71 #[regex(r#""+"#)]
72 Text,
73
74 #[regex(r"(\r|\n|\r\n)[\t ]*")]
75 Newline,
76
77 #[regex(r#"\\""""#)]
78 EscapedEndBlock,
79
80 #[regex(r#"\\."#)]
81 EscapedSequence,
82
83 #[token(r#"""""#)]
84 EndBlock,
85
86 #[error]
87 Error,
88}
89
90#[derive(Logos, Debug, PartialEq)]
91pub(crate) enum StringPart {
92 #[regex(r#"[^\n\r\\"]+"#)]
93 Text,
94
95 #[regex(r"\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]")]
96 EscapedCodepoint,
97
98 #[token(r#"\""#)]
99 EscapedQuote,
100 #[token(r#"\\"#)]
101 EscapedBackslash,
102 #[token(r#"\/"#)]
103 EscapedSlash,
104 #[token(r#"\b"#)]
105 EscapedBackspace,
106 #[token(r#"\f"#)]
107 EscapedLinefeed,
108 #[token(r#"\n"#)]
109 EscapedNewline,
110 #[token(r#"\r"#)]
111 EscapedReturn,
112 #[token(r#"\t"#)]
113 EscapedTab,
114
115 #[token("\"")]
116 EndString,
117
118 #[error]
119 Error,
120}
121
122#[inline]
123fn lex_string<'a>(lex: &mut Lexer<'a, Token<'a>>, mut output: String) -> Option<&'a str> {
124 let mut sublex = StringPart::lexer(lex.remainder());
125 while let Some(token) = sublex.next() {
126 match token {
127 StringPart::Error => break,
128 StringPart::Text => output.push_str(sublex.slice()),
129 StringPart::EscapedQuote => output.push('"'),
130 StringPart::EscapedBackslash => output.push('\\'),
131 StringPart::EscapedSlash => output.push('/'),
132 StringPart::EscapedBackspace => output.push(8 as char),
133 StringPart::EscapedLinefeed => output.push(12 as char),
134 StringPart::EscapedNewline => output.push('\n'),
135 StringPart::EscapedReturn => output.push('\r'),
136 StringPart::EscapedTab => output.push('\t'),
137 StringPart::EscapedCodepoint => {
138 use lexical_core::*;
139 const FORMAT: u128 = NumberFormatBuilder::hexadecimal();
140 const OPTIONS: ParseIntegerOptions = ParseIntegerOptions::new();
141 output.push(
142 parse_with_options::<_, FORMAT>(sublex.slice()[2..].as_bytes(), &OPTIONS)
143 .ok()
144 .and_then(std::char::from_u32)?,
145 );
146 }
147 StringPart::EndString => {
148 lex.bump_unchecked(sublex.span().end);
149 return Some(lex.extras.arena.alloc(output));
150 }
151 }
152 }
153 None
154}
155
156#[inline]
157fn lex_block_string<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option<&'a str> {
158 let mut output = String::new();
159 let mut sublex = BlockPart::lexer(lex.remainder());
160 let mut min_indent: usize = usize::MAX;
161 while let Some(token) = sublex.next() {
162 match token {
163 BlockPart::EscapedSequence | BlockPart::Text => output.push_str(sublex.slice()),
164 BlockPart::EscapedEndBlock => output.push_str("\"\"\""),
165 BlockPart::Newline => {
166 let mut slice = &sublex.slice()[1..];
167 if !slice.is_empty() && &slice[0..1] == "\n" {
168 slice = &slice[1..];
169 };
170 let indent = slice.len();
171 if indent > 0 && indent < min_indent {
172 min_indent = indent;
173 }
174 output.push('\n');
175 output.push_str(slice);
176 }
177 BlockPart::EndBlock => {
178 lex.bump_unchecked(sublex.span().end);
179 if min_indent == usize::MAX {
180 min_indent = 0;
181 }
182 let mut lines = output.lines();
183 let mut output = String::with_capacity(output.len());
184 if let Some(first) = lines.next() {
185 let stripped = first.trim();
186 if !stripped.is_empty() {
187 output.push_str(stripped);
188 output.push('\n');
189 }
190 }
191 let mut last_line = 0;
192 for line in lines {
193 last_line = output.len();
194 if line.len() > min_indent {
195 output.push_str(&line[min_indent..]);
196 }
197 output.push('\n');
198 }
199 if output[last_line..].trim().is_empty() {
200 output.truncate(last_line);
201 }
202 return Some(lex.extras.arena.alloc(output));
203 }
204 BlockPart::Error => break,
205 }
206 }
207 None
208}
209
210#[inline]
211fn parse_string<'a>(lex: &mut Lexer<'a, Token<'a>>) -> Option<&'a str> {
212 match lex.slice() {
213 r#""""# => Some(""),
214 r#"""""# => lex_block_string(lex),
215 "\"" => {
216 let remainder = lex.remainder();
219 for (i, c) in remainder.char_indices() {
220 match c {
221 '\n' | '\r' => return None,
222 '\\' => {
223 lex.bump_unchecked(i);
224 return lex_string(lex, remainder[0..i].to_string());
225 }
226 '"' => {
227 lex.bump_unchecked(i + 1);
228 return Some(&remainder[0..i]);
229 }
230 _ => {}
231 }
232 }
233 None
234 }
235 _ => None,
236 }
237}
238
239#[cfg(test)]
240mod tests {
241 use super::{Extras, Token};
242 use bumpalo::Bump;
243 use logos::Logos;
244
245 #[test]
246 fn empty() {
247 let arena = Bump::new();
248 let mut lex = Token::lexer_with_extras("", Extras { arena: &arena });
249 assert_eq!(lex.next(), None);
250 let mut lex = Token::lexer_with_extras(",, # comment\n", Extras { arena: &arena });
251 assert_eq!(lex.next(), None);
252 }
253
254 #[test]
255 fn symbols() {
256 let arena = Bump::new();
257 let extras = Extras { arena: &arena };
258 let mut lex = Token::lexer_with_extras("[]{}()=:...!", extras);
259 assert_eq!(lex.next(), Some(Token::BracketOpen));
260 assert_eq!(lex.next(), Some(Token::BracketClose));
261 assert_eq!(lex.next(), Some(Token::BraceOpen));
262 assert_eq!(lex.next(), Some(Token::BraceClose));
263 assert_eq!(lex.next(), Some(Token::ParenOpen));
264 assert_eq!(lex.next(), Some(Token::ParenClose));
265 assert_eq!(lex.next(), Some(Token::Equal));
266 assert_eq!(lex.next(), Some(Token::Colon));
267 assert_eq!(lex.next(), Some(Token::Ellipsis));
268 assert_eq!(lex.next(), Some(Token::Exclam));
269 assert_eq!(lex.next(), None);
270 }
271
272 #[test]
273 fn names() {
274 let arena = Bump::new();
275 let mut lex = Token::lexer_with_extras("hello world", Extras { arena: &arena });
276 assert_eq!(lex.next(), Some(Token::Name("hello")));
277 assert_eq!(lex.next(), Some(Token::Name("world")));
278 assert_eq!(lex.next(), None);
279
280 let mut lex = Token::lexer_with_extras("# comment\n hello", Extras { arena: &arena });
281 assert_eq!(lex.next(), Some(Token::Name("hello")));
282 assert_eq!(lex.next(), None);
283 }
284
285 #[test]
286 fn variables() {
287 let arena = Bump::new();
288 let mut lex = Token::lexer_with_extras("$var", Extras { arena: &arena });
289 assert_eq!(lex.next(), Some(Token::VariableName("var")));
290 assert_eq!(lex.next(), None);
291 }
292
293 #[test]
294 fn directives() {
295 let arena = Bump::new();
296 let mut lex = Token::lexer_with_extras("@directive", Extras { arena: &arena });
297 assert_eq!(lex.next(), Some(Token::DirectiveName("directive")));
298 assert_eq!(lex.next(), None);
299 }
300
301 #[test]
302 fn integers() {
303 let arena = Bump::new();
304 let mut lex = Token::lexer_with_extras("1 -1 123 -123 0", Extras { arena: &arena });
305 assert_eq!(lex.next(), Some(Token::Integer("1")));
306 assert_eq!(lex.next(), Some(Token::Integer("-1")));
307 assert_eq!(lex.next(), Some(Token::Integer("123")));
308 assert_eq!(lex.next(), Some(Token::Integer("-123")));
309 assert_eq!(lex.next(), Some(Token::Integer("0")));
310 assert_eq!(lex.next(), None);
311 }
312
313 #[test]
314 fn floats() {
315 let arena = Bump::new();
316 let mut lex = Token::lexer_with_extras(
317 "1.0 -10.10 -10.10E10 1.1e-1 1e1 0.0",
318 Extras { arena: &arena },
319 );
320 assert_eq!(lex.next(), Some(Token::Float("1.0")));
321 assert_eq!(lex.next(), Some(Token::Float("-10.10")));
322 assert_eq!(lex.next(), Some(Token::Float("-10.10E10")));
324 assert_eq!(lex.next(), Some(Token::Float("1.1e-1")));
325 assert_eq!(lex.next(), Some(Token::Float("1e1")));
326 assert_eq!(lex.next(), Some(Token::Float("0.0")));
327 assert_eq!(lex.next(), None);
328 }
329
330 #[test]
331 fn strings() {
332 let arena = Bump::new();
333 let mut lex = Token::lexer_with_extras("\"hello world\"", Extras { arena: &arena });
334 assert_eq!(lex.next(), Some(Token::String("hello world")));
335 assert_eq!(lex.next(), None);
336 let mut lex = Token::lexer_with_extras("\"\"", Extras { arena: &arena });
337 assert_eq!(lex.next(), Some(Token::String("")));
338 assert_eq!(lex.next(), None);
339 let mut lex =
340 Token::lexer_with_extras("\"hello \\\" \\n world\"", Extras { arena: &arena });
341 assert_eq!(lex.next(), Some(Token::String("hello \" \n world")));
342 assert_eq!(lex.next(), None);
343 let mut lex = Token::lexer_with_extras("\"\"\"hello block\"\"\"", Extras { arena: &arena });
344 assert_eq!(lex.next(), Some(Token::String("hello block\n")));
345 assert_eq!(lex.next(), None);
346 let mut lex = Token::lexer_with_extras("\"\"\"\"\"\"", Extras { arena: &arena });
347 assert_eq!(lex.next(), Some(Token::String("")));
348 assert_eq!(lex.next(), None);
349 let mut lex = Token::lexer_with_extras(
350 "\"\"\"hello\n\r\t #test\n \\\"\"\" block\"\"\"",
351 Extras { arena: &arena },
352 );
353 assert_eq!(
354 lex.next(),
355 Some(Token::String("hello\n\n #test\n\"\"\" block\n"))
356 );
357 assert_eq!(lex.next(), None);
358 }
359
360 #[test]
361 fn bad_strings() {
362 let arena = Bump::new();
363 let mut lex = Token::lexer_with_extras("\"\\ \"", Extras { arena: &arena });
364 assert_eq!(lex.next(), Some(Token::Error));
365 let mut lex = Token::lexer_with_extras("\"\n\"", Extras { arena: &arena });
366 assert_eq!(lex.next(), Some(Token::Error));
367 let mut lex = Token::lexer_with_extras("\"\r\"", Extras { arena: &arena });
368 assert_eq!(lex.next(), Some(Token::Error));
369 }
370}