1use logos::*;
2use crate::InternalString;
3
4pub const KW_REQUIRE: &str = "require";
6
7pub const KW_RETURN: &str = "return";
9pub const KW_BREAK: &str = "break";
10pub const KW_CONTINUE: &str = "continue";
11pub const KW_WEIGHT: &str = "weight";
12pub const KW_IF: &str = "if";
13pub const KW_ELSEIF: &str = "elseif";
14pub const KW_ELSE: &str = "else";
15
16pub const KW_TRUE: &str = "true";
18pub const KW_FALSE: &str = "false";
19
20pub const KW_TEXT: &str = "text";
22
23pub const KW_EDIT: &str = "edit";
25
26pub const KW_NEG: &str = "neg";
28pub const KW_NOT: &str = "not";
29pub const KW_EQ: &str = "eq";
30pub const KW_NEQ: &str = "neq";
31pub const KW_GT: &str = "gt";
32pub const KW_GE: &str = "ge";
33pub const KW_LT: &str = "lt";
34pub const KW_LE: &str = "le";
35
36pub fn is_valid_keyword_name(kw_name: &str) -> bool {
37 matches!(kw_name,
38 KW_REQUIRE |
39 KW_RETURN | KW_BREAK | KW_CONTINUE | KW_WEIGHT | KW_IF | KW_ELSEIF | KW_ELSE |
40 KW_TRUE | KW_FALSE | KW_TEXT | KW_EDIT |
41 KW_NEG | KW_NOT |
42 KW_EQ | KW_NEQ | KW_GT | KW_GE | KW_LT | KW_LE
43 )
44}
45
46#[derive(Debug, PartialEq)]
47pub struct KeywordInfo {
48 pub name: InternalString,
49 pub is_valid: bool,
50}
51
52#[derive(Debug, PartialEq)]
54pub enum PositiveFloatToken {
55 Value(f64),
56 OutOfRange,
57}
58
59#[derive(Debug, PartialEq)]
61pub enum PositiveIntegerToken {
62 Value(u64),
63 OutOfRange,
64}
65
66#[derive(Debug, PartialEq)]
68pub enum ParsedEscape {
69 Char(char),
70 InvalidChar(char),
71 InvalidUnicode(String),
72}
73
74#[derive(Logos, Debug, PartialEq)]
75pub enum RantToken {
76 #[error]
79 #[regex(r"([0-9]+(\.[0-9]+([Ee][+\-]?\d+)?|[Ee][+\-]?\d+)?[\p{L}\-_]|[\w_][\p{L}\-_]|\-[\p{L}\-_])[\w\-_]*", priority = 1)]
80 Fragment,
81
82 #[regex(r"\s+", filter_bs, priority = 2)]
84 Whitespace,
85
86 #[regex(r"[\r\n]+\s*|\s*[\r\n]+", logos::skip, priority = 3)]
88 IgnoredWhitespace,
89
90 #[token("-", priority = 10)]
92 Minus,
93
94 #[token("-=", priority = 11)]
96 MinusEquals,
97
98 #[token("{")]
100 LeftBrace,
101
102 #[token("|")]
104 VertBar,
105
106 #[token("|=")]
108 VertBarEquals,
109
110 #[token("}")]
112 RightBrace,
113
114 #[token("|>")]
116 PipeOp,
117
118 #[token("[]")]
120 PipeValue,
121
122 #[token("[")]
124 LeftBracket,
125
126 #[token("]")]
128 RightBracket,
129
130 #[token("(")]
132 LeftParen,
133
134 #[token(")")]
136 RightParen,
137
138 #[token("<>")]
140 NothingLiteral,
141
142 #[token("<")]
144 LeftAngle,
145
146 #[token(">")]
148 RightAngle,
149
150 #[token(":")]
152 Colon,
153
154 #[token("::")]
156 DoubleColon,
157
158 #[token("..")]
160 DoubleDot,
161
162 #[token("**")]
164 DoubleStar,
165
166 #[token("**=")]
168 DoubleStarEquals,
169
170 #[regex(r"\*[\w\-_][\w\d\-_]*\*", parse_temporal_spread_label)]
172 TemporalLabeled(InternalString),
173
174 #[token("*")]
176 Star,
177
178 #[token("*=")]
180 StarEquals,
181
182 #[token("+")]
184 Plus,
185
186 #[token("+=")]
188 PlusEquals,
189
190 #[token("=")]
192 Equals,
193
194 #[token("?")]
196 Question,
197
198 #[token(";")]
200 Semicolon,
201
202 #[token("@", priority = 1)]
204 At,
205
206 #[regex(r"@[a-z0-9_-]+", parse_keyword, priority = 2, ignore(case))]
208 Keyword(KeywordInfo),
209
210 #[token("/")]
212 Slash,
213
214 #[token("/=")]
216 SlashEquals,
217
218 #[token("^")]
220 Caret,
221
222 #[token("^=")]
224 CaretEquals,
225
226 #[token("$")]
228 Dollar,
229
230 #[token("%")]
232 Percent,
233
234 #[token("%=")]
236 PercentEquals,
237
238 #[token("`")]
240 Hint,
241
242 #[token("~")]
244 Sink,
245
246 #[token("&")]
248 And,
249
250 #[token("&=")]
252 AndEquals,
253
254 #[regex(r"[0-9]+", parse_integer, priority = 2)]
256 IntegerPositive(PositiveIntegerToken),
257
258 #[regex(r"[0-9]+(\.[0-9]+([Ee][+\-]?\d+)?|[Ee][+\-]?\d+)", parse_float, priority = 3)]
260 FloatPositive(PositiveFloatToken),
261
262 #[regex(r"\s*##([^#]|#[^#])*(##\s*)?", logos::skip, priority = 6)]
264 #[regex(r"\s*#([^#][^\r\n]*)?\n?", logos::skip, priority = 5)]
265 Comment,
266
267 #[regex(r"\\\S", parse_escape, priority = 10)]
269 #[regex(r"\\x\S\S", parse_byte_escape, priority = 11)]
270 #[regex(r"\\u\S\S\S\S", parse_unicode_escape, priority = 11)]
271 #[regex(r"\\U\S\S\S\S\S\S\S\S", parse_unicode_escape, priority = 11)]
272 #[regex(r"\\U\(\S+\)", parse_unicode_unsized_escape, priority = 12)]
273 Escape(ParsedEscape),
274
275 #[regex(r#""(""|[^"])*""#, parse_string_literal)]
277 StringLiteral(InternalString),
278
279 #[regex(r#""(""|[^"])*"#)]
281 UnterminatedStringLiteral,
282}
283
284fn parse_temporal_spread_label(lex: &mut Lexer<RantToken>) -> InternalString {
285 let slice = lex.slice();
286 InternalString::from(&slice[1 .. slice.len() - 1])
287}
288
289fn parse_string_literal(lex: &mut Lexer<RantToken>) -> InternalString {
290 let literal = lex.slice();
291 let literal_content = &literal[1..literal.len() - 1];
292 let mut string_content = InternalString::new();
293 let mut prev_quote = false;
294 for c in literal_content.chars() {
295 match c {
296 '"' => {
297 if prev_quote {
298 prev_quote = false;
299 string_content.push('"');
300 } else {
301 prev_quote = true;
302 }
303 },
304 c => {
305 string_content.push(c)
306 }
307 }
308 }
309 string_content
310}
311
312fn parse_keyword(lex: &mut Lexer<RantToken>) -> KeywordInfo {
313 let kwd_literal = lex.slice();
314 let kwd_content = &kwd_literal[1..];
315 KeywordInfo {
316 is_valid: is_valid_keyword_name(kwd_content),
317 name: InternalString::from(kwd_content),
318 }
319}
320
321fn filter_bs(lex: &mut Lexer<RantToken>) -> Filter<()> {
323 if lex.span().start > 0 {
324 return Filter::Emit(())
325 }
326 Filter::Skip
327}
328
329fn parse_escape(lex: &mut Lexer<RantToken>) -> ParsedEscape {
330 let slice = lex.slice();
331 ParsedEscape::Char(match slice.chars().nth(1).unwrap() {
332 'r' => '\r',
333 'n' => '\n',
334 't' => '\t',
335 '0' => '\0',
336 's' => ' ',
337 c @ (
338 '(' | ')' | '[' | ']' | '{' | '}' | '<' | '>' |
339 '\\' | '@' | ':' | ';' | '|' | '"' |
340 '+' | '-' | '*' | '/' | '$' | '%' | '`' | '~' | '^'
341 ) => c,
342 c => return ParsedEscape::InvalidChar(c)
343 })
344}
345
346fn parse_byte_escape(lex: &mut Lexer<RantToken>) -> ParsedEscape {
347 let slice = &lex.slice()[2..];
348 let c = u8::from_str_radix(slice, 16).ok().map(char::from);
349 match c {
350 Some(c) => ParsedEscape::Char(c),
351 None => ParsedEscape::InvalidUnicode(slice.to_owned()),
352 }
353}
354
355fn parse_unicode_escape(lex: &mut Lexer<RantToken>) -> ParsedEscape {
356 let slice = &lex.slice()[2..];
357 let c = u32::from_str_radix(slice, 16).ok().and_then(char::from_u32);
358 match c {
359 Some(c) => ParsedEscape::Char(c),
360 None => ParsedEscape::InvalidUnicode(slice.to_owned()),
361 }
362}
363
364fn parse_unicode_unsized_escape(lex: &mut Lexer<RantToken>) -> ParsedEscape {
365 let len = lex.slice().len();
366 let codepoint_len = len - 4;
367 let slice = &lex.slice()[3..(len - 1)];
368 if codepoint_len > 8 { return ParsedEscape::InvalidUnicode(slice.to_owned()) }
369 let c = u32::from_str_radix(slice, 16).ok().and_then(char::from_u32);
370 match c {
371 Some(c) => ParsedEscape::Char(c),
372 None => ParsedEscape::InvalidUnicode(slice.to_owned()),
373 }
374}
375
376fn parse_float(lex: &mut Lexer<RantToken>) -> PositiveFloatToken {
377 let slice = lex.slice();
378 match slice.parse() {
379 Ok(f) => PositiveFloatToken::Value(f),
380 Err(_) => PositiveFloatToken::OutOfRange,
381 }
382}
383
384fn parse_integer(lex: &mut Lexer<RantToken>) -> PositiveIntegerToken {
385 let slice = lex.slice();
386 match slice.parse() {
387 Ok(i) => PositiveIntegerToken::Value(i),
388 Err(_) => PositiveIntegerToken::OutOfRange,
389 }
390}