1#![forbid(unsafe_code)]
2#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
3
4use std::fmt::{self, Display, Formatter};
7use std::str::FromStr;
8use std::sync::OnceLock;
9
10use span::{Span, Spanned};
11
12pub mod macros;
13pub mod span;
14
15#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
16pub struct Precedence(u8);
17
18impl Precedence {
19 pub fn new(prec: u8) -> Precedence {
20 Precedence(prec)
21 }
22
23 pub fn lowest() -> Precedence {
24 Precedence(0)
25 }
26
27 pub fn highest() -> Precedence {
28 Precedence(255)
29 }
30
31 pub fn next(&self) -> Precedence {
32 Precedence::new(self.0 + 1)
33 }
34}
35
36#[derive(Clone, Debug, PartialEq, serde::Deserialize, serde::Serialize)]
39#[serde(rename_all = "lowercase")]
40pub enum Token {
41 As(Span),
43 Class(Span),
44 Declare(Span),
45 Else(Span),
46 Fn(Span),
47 For(Span),
48 If(Span),
49 Import(Span),
50 Is(Span),
51 Instance(Span),
52 Match(Span),
53 Pub(Span),
54 Type(Span),
55 When(Span),
56 Then(Span),
57 With(Span),
58 Where(Span),
59
60 Add(Span),
62 And(Span),
63 Concat(Span),
64 Div(Span),
65 Dot(Span),
66 Eq(Span),
67 Ne(Span),
68 Ge(Span),
69 Gt(Span),
70 Le(Span),
71 Lt(Span),
72 Mod(Span),
73 Mul(Span),
74 Or(Span),
75 Sub(Span),
76
77 ArrowL(Span),
79 ArrowR(Span),
80 Assign(Span),
81 BackSlash(Span),
82 BraceL(Span),
83 BraceR(Span),
84 BracketL(Span),
85 BracketR(Span),
86 Colon(Span),
87 ColonColon(Span),
88 Comma(Span),
89 CommentL(Span),
90 CommentR(Span),
91 DotDot(Span),
92 HashTag(Span),
93 In(Span),
94 Let(Span),
95 Rec(Span),
96 ParenL(Span),
97 ParenR(Span),
98 Pipe(Span),
99 Question(Span),
100 SemiColon(Span),
101 Whitespace(Span),
102 WhitespaceNewline(Span),
103
104 Bool(bool, Span),
106 Float(f64, Span),
107 Int(u64, Span),
108 Null(Span),
109 String(String, Span),
110
111 HttpsUrl(String, Span),
113 Ident(String, Span),
114
115 Eof(Span),
117}
118
119#[derive(Debug, thiserror::Error)]
120pub enum LexicalError {
121 #[error("Unexpected token {0}")]
122 UnexpectedToken(Span),
123 #[error("invalid {kind} literal `{text}`: {error}")]
124 InvalidLiteral {
125 kind: &'static str,
126 text: String,
127 error: String,
128 span: Span,
129 },
130 #[error("internal lexer error: {0}")]
131 Internal(String),
132}
133
134impl Token {
135 pub fn tokenize(input: &str) -> Result<Tokens, LexicalError> {
136 let mut line = 1;
137 let mut column = 1;
138 let mut tokens = Vec::new();
139
140 let re = Token::regex()?;
141 for capture in re.captures_iter(input) {
142 let Some(lexeme) = capture.get(0).map(|m| m.as_str()) else {
143 return Err(LexicalError::Internal(
144 "regex capture missing group 0".into(),
145 ));
146 };
147 let begin_line = line;
148 let begin_column = column;
149 column += lexeme.chars().count();
150 let span = Span::new(begin_line, begin_column, line, column);
151 if lexeme == "\n" {
152 line += 1;
153 column = 1;
154 }
155
156 let token =
163 if capture.name("As").is_some() {
165 Token::As(span)
166 } else if capture.name("Class").is_some() {
167 Token::Class(span)
168 } else if capture.name("Declare").is_some() {
169 Token::Declare(span)
170 } else if capture.name("Else").is_some() {
171 Token::Else(span)
172 } else if capture.name("Fn").is_some() {
173 Token::Fn(span)
174 } else if capture.name("For").is_some() {
175 Token::For(span)
176 } else if capture.name("If").is_some() {
177 Token::If(span)
178 } else if capture.name("Import").is_some() {
179 Token::Import(span)
180 } else if capture.name("Is").is_some() {
181 Token::Is(span)
182 } else if capture.name("Instance").is_some() {
183 Token::Instance(span)
184 } else if capture.name("Match").is_some() {
185 Token::Match(span)
186 } else if capture.name("Pub").is_some() {
187 Token::Pub(span)
188 } else if capture.name("Type").is_some() {
189 Token::Type(span)
190 } else if capture.name("When").is_some() {
191 Token::When(span)
192 } else if capture.name("In").is_some() {
193 Token::In(span)
194 } else if capture.name("Then").is_some() {
195 Token::Then(span)
196 } else if capture.name("With").is_some() {
197 Token::With(span)
198 } else if capture.name("Where").is_some() {
199 Token::Where(span)
200 }
201
202 else if capture.name("ArrowL").is_some() {
204 Token::ArrowL(span)
205 } else if capture.name("ArrowR").is_some() {
206 Token::ArrowR(span)
207 } else if capture.name("BackSlash").is_some() {
208 Token::BackSlash(span)
209 } else if capture.name("CommentL").is_some() {
210 Token::CommentL(span)
211 } else if capture.name("CommentR").is_some() {
212 Token::CommentR(span)
213 } else if capture.name("BraceL").is_some() {
214 Token::BraceL(span)
215 } else if capture.name("BraceR").is_some() {
216 Token::BraceR(span)
217 } else if capture.name("BracketL").is_some() {
218 Token::BracketL(span)
219 } else if capture.name("BracketR").is_some() {
220 Token::BracketR(span)
221 } else if capture.name("ColonColon").is_some() {
222 Token::ColonColon(span)
223 } else if capture.name("Colon").is_some() {
224 Token::Colon(span)
225 } else if capture.name("Comma").is_some() {
226 Token::Comma(span)
227 } else if capture.name("DotDot").is_some() {
228 Token::DotDot(span)
229 } else if capture.name("HashTag").is_some() {
230 Token::HashTag(span)
231 } else if capture.name("In").is_some() {
232 Token::In(span)
233 } else if capture.name("Let").is_some() {
234 Token::Let(span)
235 } else if capture.name("Rec").is_some() {
236 Token::Rec(span)
237 } else if capture.name("ParenL").is_some() {
238 Token::ParenL(span)
239 } else if capture.name("ParenR").is_some() {
240 Token::ParenR(span)
241 } else if capture.name("Pipe").is_some() {
242 Token::Pipe(span)
243 } else if capture.name("Question").is_some() {
244 Token::Question(span)
245 } else if capture.name("SemiColon").is_some() {
246 Token::SemiColon(span)
247 } else if capture.name("Whitespace").is_some() {
248 Token::Whitespace(span)
249 } else if capture.name("WhitespaceNewline").is_some() {
250 Token::WhitespaceNewline(span)
251 }
252
253 else if capture.name("Concat").is_some() {
255 Token::Concat(span)
256 } else if capture.name("Add").is_some() {
257 Token::Add(span)
258 } else if capture.name("And").is_some() {
259 Token::And(span)
260 } else if capture.name("Div").is_some() {
261 Token::Div(span)
262 } else if capture.name("Dot").is_some() {
263 Token::Dot(span)
264 } else if capture.name("Equal").is_some() {
265 Token::Eq(span)
266 } else if capture.name("NotEqual").is_some() {
267 Token::Ne(span)
268 } else if capture.name("GreaterThanEq").is_some() {
269 Token::Ge(span)
270 } else if capture.name("GreaterThan").is_some() {
271 Token::Gt(span)
272 } else if capture.name("LessThanEq").is_some() {
273 Token::Le(span)
274 } else if capture.name("LessThan").is_some() {
275 Token::Lt(span)
276 } else if capture.name("Mod").is_some() {
277 Token::Mod(span)
278 } else if capture.name("Mul").is_some() {
279 Token::Mul(span)
280 } else if capture.name("Or").is_some() {
281 Token::Or(span)
282 } else if capture.name("Sub").is_some() {
283 Token::Sub(span)
284 } else if capture.name("Assign").is_some() {
285 Token::Assign(span) }
287
288 else if let Some(m) = capture.name("Bool") {
290 let text = m.as_str();
291 let v = bool::from_str(text).map_err(|e| LexicalError::InvalidLiteral {
292 kind: "bool",
293 text: text.to_string(),
294 error: e.to_string(),
295 span,
296 })?;
297 Token::Bool(v, span)
298 } else if let Some(m) = capture.name("Float") {
299 let text = m.as_str();
300 let v = f64::from_str(text).map_err(|e| LexicalError::InvalidLiteral {
301 kind: "float",
302 text: text.to_string(),
303 error: e.to_string(),
304 span,
305 })?;
306 Token::Float(v, span)
307 } else if let Some(m) = capture.name("Int") {
308 let text = m.as_str();
309 let v = u64::from_str(text).map_err(|e| LexicalError::InvalidLiteral {
310 kind: "int",
311 text: text.to_string(),
312 error: e.to_string(),
313 span,
314 })?;
315 Token::Int(v, span)
316 } else if capture.name("Null").is_some() {
317 Token::Null(span)
318 } else if let Some(m) = capture.name("DoubleString") {
319 Token::String(m.as_str().to_string(), span)
320 } else if let Some(m) = capture.name("SingleString") {
321 Token::String(m.as_str().to_string(), span)
322 }
323
324 else if let Some(m) = capture.name("HttpsUrl") {
326 Token::HttpsUrl(m.as_str().to_string(), span)
327 }
328 else if let Some(m) = capture.name("Ident") {
329 Token::Ident(m.as_str().to_string(), span)
330 }
331
332 else {
334 return Err(LexicalError::UnexpectedToken(span));
335 };
336 tokens.push(token)
337 }
338
339 Ok(Tokens {
341 items: tokens
342 .into_iter()
343 .filter(|token| !matches!(*token, Token::Whitespace(..)))
344 .collect(),
345 eof: Span::new(line, column, line, column),
346 })
347 }
348
349 pub fn regex() -> Result<&'static regex::Regex, LexicalError> {
355 static TOKEN_REGEX: OnceLock<Result<regex::Regex, String>> = OnceLock::new();
356 let compiled = TOKEN_REGEX.get_or_init(|| {
357 regex::Regex::from_str(concat!(
358 r"(?P<As>\bas\b)|",
360 r"(?P<Class>\bclass\b)|",
361 r"(?P<Declare>\bdeclare\b)|",
362 r"(?P<Else>\belse\b)|",
363 r"(?P<Fn>\bfn\b)|",
364 r"(?P<For>\bfor\b)|",
365 r"(?P<If>\bif\b)|",
366 r"(?P<Import>\bimport\b)|",
367 r"(?P<Is>\bis\b)|",
368 r"(?P<Instance>\binstance\b)|",
369 r"(?P<Match>\bmatch\b)|",
370 r"(?P<Pub>\bpub\b)|",
371 r"(?P<Type>\btype\b)|",
372 r"(?P<When>\bwhen\b)|",
373 r"(?P<Then>\bthen\b)|",
374 r"(?P<With>\bwith\b)|",
375 r"(?P<Where>\bwhere\b)|",
376 r"(?P<ArrowL><-|←)|",
378 r"(?P<ArrowR>->|→)|",
379 r"(?P<BackSlash>\\|λ)|",
380 r"(?P<CommentL>\{-)|",
381 r"(?P<CommentR>-\})|",
382 r"(?P<BraceL>\{)|",
383 r"(?P<BraceR>\})|",
384 r"(?P<BracketL>\[)|",
385 r"(?P<BracketR>\])|",
386 r"(?P<ColonColon>::)|", r"(?P<Colon>:)|",
388 r"(?P<Comma>,)|",
389 r"(?P<DotDot>\.\.)|",
390 r"(?P<HashTag>\#)|",
391 r"(?P<In>\bin\b)|", r"(?P<Let>\blet\b)|", r"(?P<Rec>\brec\b)|",
394 r"(?P<LambdaR>->)|",
395 r"(?P<ParenL>\()|",
396 r"(?P<ParenR>\))|",
397 r"(?P<Question>\?)|",
398 r"(?P<SemiColon>;)|",
399 r"(?P<Whitespace>( |\t))|",
400 r"(?P<WhitespaceNewline>(\n|\r))|",
401 r"(?P<Concat>\+\+)|",
403 r"(?P<Add>\+)|",
404 r"(?P<And>&&)|",
405 r"(?P<Div>/)|",
406 r"(?P<Dot>\.)|",
407 r"(?P<Equal>==)|",
408 r"(?P<Assign>=)|", r"(?P<NotEqual>!=)|",
410 r"(?P<LessThanEq><=)|",
411 r"(?P<LessThan><)|",
412 r"(?P<GreaterThanEq>>=)|",
413 r"(?P<GreaterThan>>)|",
414 r"(?P<Mod>%)|",
415 r"(?P<Mul>\*)|",
416 r"(?P<Or>\|\|)|",
417 r"(?P<Pipe>\|)|",
418 r"(?P<Sub>-)|",
419 r"(?P<Bool>\b(true|false)\b)|",
421 r"(?P<Float>[0-9]+\.[0-9]+)|",
422 r"(?P<Int>[0-9]+)|",
423 r"(?P<Null>\bnull\b)|",
424 r#""(?P<DoubleString>(\\"|[^"])*)"|"#,
425 r#"'(?P<SingleString>(\\'|[^'])*)'|"#,
426 r"(?P<HttpsUrl>https://[^\s]+)|",
428 r"(?P<Ident>[_a-zA-Z]([_a-zA-Z]|[0-9])*)|",
430 r"(.)",
432 ))
433 .map_err(|e| e.to_string())
434 });
435 match compiled {
436 Ok(re) => Ok(re),
437 Err(msg) => Err(LexicalError::Internal(format!(
438 "failed to compile token regex: {msg}"
439 ))),
440 }
441 }
442
443 pub fn precedence(&self) -> Precedence {
444 use Token::*;
445
446 match self {
447 Or(..) => Precedence(1),
448 And(..) => Precedence(2),
449 Eq(..) | Ne(..) | Lt(..) | Le(..) | Gt(..) | Ge(..) => Precedence(3),
450 Add(..) | Sub(..) | Concat(..) => Precedence(4),
451 Mul(..) | Div(..) | Mod(..) => Precedence(5),
452 Ident(..) | HttpsUrl(..) => Precedence::highest(),
453 _ => Precedence::lowest(),
454 }
455 }
456
457 pub fn is_whitespace(&self) -> bool {
458 matches!(self, Token::Whitespace(..) | Token::WhitespaceNewline(..))
459 }
460}
461
462impl Spanned for Token {
463 fn span(&self) -> &Span {
464 use Token::*;
465
466 match self {
467 As(span, ..) => span,
469 Class(span, ..) => span,
470 Declare(span, ..) => span,
471 Else(span, ..) => span,
472 Fn(span, ..) => span,
473 For(span, ..) => span,
474 If(span, ..) => span,
475 Import(span, ..) => span,
476 Is(span, ..) => span,
477 Instance(span, ..) => span,
478 Match(span, ..) => span,
479 Pub(span, ..) => span,
480 Type(span, ..) => span,
481 When(span, ..) => span,
482 Then(span, ..) => span,
483 With(span, ..) => span,
484 Where(span, ..) => span,
485
486 ArrowL(span, ..) => span,
488 ArrowR(span, ..) => span,
489 Assign(span, ..) => span,
490 BackSlash(span, ..) => span,
491 BraceL(span, ..) => span,
492 BraceR(span, ..) => span,
493 BracketL(span, ..) => span,
494 BracketR(span, ..) => span,
495 Colon(span, ..) => span,
496 ColonColon(span, ..) => span,
497 Comma(span, ..) => span,
498 CommentL(span, ..) => span,
499 CommentR(span, ..) => span,
500 Dot(span, ..) => span,
501 DotDot(span, ..) => span,
502 HashTag(span, ..) => span,
503 In(span, ..) => span,
504 Let(span, ..) => span,
505 Rec(span, ..) => span,
506 ParenL(span, ..) => span,
507 ParenR(span, ..) => span,
508 Pipe(span, ..) => span,
509 Question(span, ..) => span,
510 SemiColon(span, ..) => span,
511 Whitespace(span, ..) => span,
512 WhitespaceNewline(span, ..) => span,
513
514 Add(span, ..) => span,
516 And(span, ..) => span,
517 Concat(span, ..) => span,
518 Div(span, ..) => span,
519 Eq(span, ..) => span,
520 Ne(span, ..) => span,
521 Ge(span, ..) => span,
522 Gt(span, ..) => span,
523 Le(span, ..) => span,
524 Lt(span, ..) => span,
525 Mod(span, ..) => span,
526 Mul(span, ..) => span,
527 Or(span, ..) => span,
528 Sub(span, ..) => span,
529
530 Bool(_, span, ..) => span,
532 Float(_, span, ..) => span,
533 Int(_, span, ..) => span,
534 Null(span, ..) => span,
535 String(_, span, ..) => span,
536
537 HttpsUrl(_, span, ..) => span,
539 Ident(_, span, ..) => span,
540
541 Eof(span) => span,
543 }
544 }
545
546 fn span_mut(&mut self) -> &mut Span {
547 use Token::*;
548
549 match self {
550 As(span, ..) => span,
552 Class(span, ..) => span,
553 Declare(span, ..) => span,
554 Else(span, ..) => span,
555 Fn(span, ..) => span,
556 For(span, ..) => span,
557 If(span, ..) => span,
558 Import(span, ..) => span,
559 Is(span, ..) => span,
560 Instance(span, ..) => span,
561 Match(span, ..) => span,
562 Pub(span, ..) => span,
563 Type(span, ..) => span,
564 When(span, ..) => span,
565 Then(span, ..) => span,
566 With(span, ..) => span,
567 Where(span, ..) => span,
568
569 ArrowL(span, ..) => span,
571 ArrowR(span, ..) => span,
572 Assign(span, ..) => span,
573 BackSlash(span, ..) => span,
574 BraceL(span, ..) => span,
575 BraceR(span, ..) => span,
576 BracketL(span, ..) => span,
577 BracketR(span, ..) => span,
578 Colon(span, ..) => span,
579 ColonColon(span, ..) => span,
580 Comma(span, ..) => span,
581 CommentL(span, ..) => span,
582 CommentR(span, ..) => span,
583 Dot(span, ..) => span,
584 DotDot(span, ..) => span,
585 HashTag(span, ..) => span,
586 In(span, ..) => span,
587 Let(span, ..) => span,
588 Rec(span, ..) => span,
589 ParenL(span, ..) => span,
590 ParenR(span, ..) => span,
591 Pipe(span, ..) => span,
592 Question(span, ..) => span,
593 SemiColon(span, ..) => span,
594 Whitespace(span, ..) => span,
595 WhitespaceNewline(span, ..) => span,
596
597 Add(span, ..) => span,
599 And(span, ..) => span,
600 Concat(span, ..) => span,
601 Div(span, ..) => span,
602 Eq(span, ..) => span,
603 Ne(span, ..) => span,
604 Ge(span, ..) => span,
605 Gt(span, ..) => span,
606 Le(span, ..) => span,
607 Lt(span, ..) => span,
608 Mod(span, ..) => span,
609 Mul(span, ..) => span,
610 Or(span, ..) => span,
611 Sub(span, ..) => span,
612
613 Bool(_, span, ..) => span,
615 Float(_, span, ..) => span,
616 Int(_, span, ..) => span,
617 Null(span, ..) => span,
618 String(_, span, ..) => span,
619
620 HttpsUrl(_, span, ..) => span,
622 Ident(_, span, ..) => span,
623
624 Eof(span) => span,
626 }
627 }
628}
629
630impl Display for Token {
631 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
632 use Token::*;
633
634 match self {
635 As(..) => write!(f, "as"),
637 Class(..) => write!(f, "class"),
638 Declare(..) => write!(f, "declare"),
639 Else(..) => write!(f, "else"),
640 Fn(..) => write!(f, "fn"),
641 For(..) => write!(f, "for"),
642 If(..) => write!(f, "if"),
643 Import(..) => write!(f, "import"),
644 Is(..) => write!(f, "is"),
645 Instance(..) => write!(f, "instance"),
646 Match(..) => write!(f, "match"),
647 Pub(..) => write!(f, "pub"),
648 Type(..) => write!(f, "type"),
649 When(..) => write!(f, "when"),
650 Then(..) => write!(f, "then"),
651 With(..) => write!(f, "with"),
652 Where(..) => write!(f, "where"),
653
654 ArrowL(..) => write!(f, "<-"),
656 ArrowR(..) => write!(f, "->"),
657 Assign(..) => write!(f, "="),
658 BackSlash(..) => write!(f, "\\"),
659 BraceL(..) => write!(f, "{{"),
660 BraceR(..) => write!(f, "}}"),
661 BracketL(..) => write!(f, "["),
662 BracketR(..) => write!(f, "]"),
663 Colon(..) => write!(f, ":"),
664 ColonColon(..) => write!(f, "::"),
665 Comma(..) => write!(f, ","),
666 CommentL(..) => write!(f, "{{-"),
667 CommentR(..) => write!(f, "-}}"),
668 Dot(..) => write!(f, "."),
669 DotDot(..) => write!(f, ".."),
670 HashTag(..) => write!(f, "#"),
671 In(..) => write!(f, "in"),
672 Let(..) => write!(f, "let"),
673 Rec(..) => write!(f, "rec"),
674 ParenL(..) => write!(f, "("),
675 ParenR(..) => write!(f, ")"),
676 Pipe(..) => write!(f, "|"),
677 Question(..) => write!(f, "?"),
678 SemiColon(..) => write!(f, ";"),
679 Whitespace(..) => write!(f, " "),
680 WhitespaceNewline(..) => writeln!(f),
681
682 Add(..) => write!(f, "+"),
684 And(..) => write!(f, "&&"),
685 Concat(..) => write!(f, "++"),
686 Div(..) => write!(f, "/"),
687 Eq(..) => write!(f, "=="),
688 Ne(..) => write!(f, "!="),
689 Gt(..) => write!(f, ">"),
690 Ge(..) => write!(f, ">="),
691 Lt(..) => write!(f, "<"),
692 Le(..) => write!(f, "<="),
693 Mod(..) => write!(f, "%"),
694 Mul(..) => write!(f, "*"),
695 Or(..) => write!(f, "||"),
696 Sub(..) => write!(f, "-"),
697
698 Bool(x, ..) => write!(f, "{}", x),
700 Float(x, ..) => write!(f, "{}", x),
701 Int(x, ..) => write!(f, "{}", x),
702 Null(..) => write!(f, "null"),
703 String(x, ..) => write!(f, "{}", x),
704
705 HttpsUrl(url, ..) => write!(f, "{}", url),
707 Ident(ident, ..) => write!(f, "{}", ident),
708
709 Eof(..) => write!(f, "EOF"),
711 }
712 }
713}
714
715#[derive(Clone, Debug, PartialEq, serde::Deserialize, serde::Serialize)]
716pub struct Tokens {
717 pub items: Vec<Token>,
718 pub eof: Span,
719}