1use std::{fmt::Display, ops::Range};
2
3use chumsky::{
4 prelude::*,
5 text::{ident, Character},
6};
7
8#[derive(Debug, Clone, Hash, PartialEq, Eq)]
9pub enum Token {
10 String(String),
11 Ident(String),
12 RawIdent(String),
13
14 Unit,
15 Expose,
16 As,
17
18 Attr,
19 Tilde,
20 DoubleColon,
21 DoubleArrow,
22 Comma,
23
24 OpenDelim(Delim),
25 CloseDelim(Delim),
26
27 Err(char),
28}
29
30#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
31pub enum Delim {
32 Round,
33 Square,
34 Arrow,
35 Curly,
36}
37
38impl Display for Token {
39 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40 use Delim::{Arrow, Curly, Round, Square};
41 use Token::{
42 As, Attr, CloseDelim, Comma, DoubleArrow, DoubleColon, Err, Expose, Ident, OpenDelim,
43 RawIdent, String, Tilde, Unit,
44 };
45
46 match self {
47 String(s) => f.write_fmt(format_args!("\"{s}\"")),
48 Ident(i) | RawIdent(i) => i.fmt(f),
49 Unit => "unit".fmt(f),
50 Expose => "expose".fmt(f),
51 As => "as".fmt(f),
52 Attr => "@".fmt(f),
53 Tilde => "~".fmt(f),
54 Comma => ",".fmt(f),
55 DoubleColon => "::".fmt(f),
56 DoubleArrow => "=>".fmt(f),
57 OpenDelim(Round) => "(".fmt(f),
58 OpenDelim(Square) => "[".fmt(f),
59 OpenDelim(Arrow) => "<".fmt(f),
60 OpenDelim(Curly) => "{".fmt(f),
61 CloseDelim(Round) => ")".fmt(f),
62 CloseDelim(Square) => "]".fmt(f),
63 CloseDelim(Arrow) => ">".fmt(f),
64 CloseDelim(Curly) => "}".fmt(f),
65 Err(c) => c.fmt(f),
66 }
67 }
68}
69
70pub fn lexer() -> impl Parser<char, Vec<(Token, Range<usize>)>, Error = Simple<char>> {
71 let escape = just('\\').ignore_then(
72 just('\\')
73 .or(just('/'))
74 .or(just('"'))
75 .or(just('b').to('\x08'))
76 .or(just('f').to('\x0C'))
77 .or(just('n').to('\n'))
78 .or(just('r').to('\r'))
79 .or(just('t').to('\t')),
80 );
81
82 let str_lit = filter(|c| *c != '\\' && *c != '"')
83 .or(escape)
84 .repeated()
85 .delimited_by(just('"'), just('"'))
86 .collect();
87
88 let keywords = choice((
89 just("expose").to(Token::Expose),
90 just("as").to(Token::As),
91 just("unit").to(Token::Unit),
92 just('@').to(Token::Attr),
93 just('~').to(Token::Tilde),
94 just("::").to(Token::DoubleColon),
95 just("=>").to(Token::DoubleArrow),
96 just(',').to(Token::Comma),
97 ));
98
99 let delim = choice((
100 just('(').to(Token::OpenDelim(Delim::Round)),
101 just('[').to(Token::OpenDelim(Delim::Square)),
102 just('<').to(Token::OpenDelim(Delim::Arrow)),
103 just('{').to(Token::OpenDelim(Delim::Curly)),
104 just(')').to(Token::CloseDelim(Delim::Round)),
105 just(']').to(Token::CloseDelim(Delim::Square)),
106 just('>').to(Token::CloseDelim(Delim::Arrow)),
107 just('}').to(Token::CloseDelim(Delim::Curly)),
108 ));
109
110 let raw_ident = filter(|c: &char| {
111 matches!(c.to_char(),
112 '!'..='&' | '*'..='+' | '-'..='.' | '0'..=';' | '=' | '?'..='Z' | '^'..='z' | '|')
113 })
114 .repeated()
115 .at_least(1)
116 .collect::<String>();
117
118 let token = choice((
119 keywords,
120 delim,
121 str_lit.map(Token::String),
122 ident().map(Token::Ident),
123 raw_ident.map(Token::RawIdent),
124 ))
125 .padded()
126 .map_with_span(|t, span| (t, span));
127
128 token
129 .clone()
130 .recover_with(skip_parser(
131 token
132 .not()
133 .repeated()
134 .ignore_then(any().rewind())
135 .map_with_span(|c, span| (Token::Err(c), span)),
136 ))
137 .repeated()
138 .padded()
139 .then_ignore(end())
140 .labelled("tokens")
141}