Skip to main content

lutra_compiler/parser/lexer/
token.rs

1use std::fmt::Write;
2
3use crate::pr;
4
5#[derive(Clone, PartialEq, Eq)]
6pub struct Token {
7    pub kind: TokenKind,
8    pub span: SpanInSource,
9}
10
11#[derive(Clone, PartialEq, Eq)]
12pub struct SpanInSource {
13    pub start: u32,
14    pub len: u16,
15}
16
17#[derive(Clone, PartialEq, Debug)]
18pub enum TokenKind {
19    Ident(String),
20    Keyword(&'static str),
21    Literal(pr::Literal),
22
23    Interpolation(char, String),
24
25    /// single-char control tokens
26    Control(char),
27
28    ArrowThin,   // ->
29    ArrowFat,    // =>
30    Eq,          // ==
31    Ne,          // !=
32    Gte,         // >=
33    Lte,         // <=
34    RegexSearch, // ~=
35    And,         // &&
36    Or,          // ||
37    Coalesce,    // ??
38    DivInt,      // //
39    Pow,         // **
40    PathSep,     // ::
41    Range,       // ..
42
43    DocComment(String),
44    DocCommentSelf(String),
45
46    NewLine,
47    Comment(String),
48}
49
50// This is here because Literal::Float(f64) does not implement Hash, so we cannot simply derive it.
51// There are reasons for that, but chumsky::Error needs Hash for the TokenKind, so it can deduplicate
52// tokens in error.
53// So this hack could lead to duplicated tokens in error messages. Oh no.
54#[allow(clippy::derived_hash_with_manual_eq)]
55impl std::hash::Hash for TokenKind {
56    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
57        core::mem::discriminant(self).hash(state);
58    }
59}
60
61impl std::cmp::Eq for TokenKind {}
62
63impl std::fmt::Display for TokenKind {
64    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65        match self {
66            TokenKind::Ident(s) => {
67                if s.is_empty() {
68                    // FYI this shows up in errors
69                    write!(f, "an identifier")
70                } else {
71                    write!(f, "{s}")
72                }
73            }
74            TokenKind::Keyword(s) => write!(f, "keyword {s}"),
75            TokenKind::Literal(lit) => write!(f, "{lit}"),
76            TokenKind::Control(c) => write!(f, "{c}"),
77
78            TokenKind::ArrowThin => f.write_str("->"),
79            TokenKind::ArrowFat => f.write_str("=>"),
80            TokenKind::Eq => f.write_str("=="),
81            TokenKind::Ne => f.write_str("!="),
82            TokenKind::Gte => f.write_str(">="),
83            TokenKind::Lte => f.write_str("<="),
84            TokenKind::RegexSearch => f.write_str("~="),
85            TokenKind::And => f.write_str("&&"),
86            TokenKind::Or => f.write_str("||"),
87            TokenKind::Coalesce => f.write_str("??"),
88            TokenKind::DivInt => f.write_str("//"),
89            TokenKind::Pow => f.write_str("**"),
90            TokenKind::PathSep => f.write_str("::"),
91            TokenKind::Range => f.write_str(".."),
92
93            TokenKind::Interpolation(c, s) => {
94                write!(f, "{c}\"{s}\"")
95            }
96            TokenKind::DocComment(text) => writeln!(f, "## {text}"),
97            TokenKind::DocCommentSelf(text) => writeln!(f, "#! {text}"),
98
99            TokenKind::Comment(text) => writeln!(f, "# {text}"),
100            TokenKind::NewLine => f.write_char('\n'),
101        }
102    }
103}
104
105impl SpanInSource {
106    pub fn with_source_id(self, source_id: u16) -> crate::Span {
107        crate::Span {
108            source_id,
109            start: self.start,
110            len: self.len,
111        }
112    }
113
114    pub fn end(&self) -> u32 {
115        self.start + self.len as u32
116    }
117}
118
119impl std::fmt::Debug for Token {
120    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
121        write!(f, "{:?}: {:?}", self.span, self.kind)
122    }
123}
124
125impl std::fmt::Debug for SpanInSource {
126    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
127        write!(f, "{}..{}", self.start, self.end())
128    }
129}
130
131impl chumsky::Span for SpanInSource {
132    type Context = ();
133
134    type Offset = u32;
135
136    fn new(_: Self::Context, range: std::ops::Range<Self::Offset>) -> Self {
137        Self {
138            start: range.start,
139            len: (range.end - range.start) as u16,
140        }
141    }
142
143    fn context(&self) -> Self::Context {}
144
145    fn start(&self) -> Self::Offset {
146        self.start
147    }
148
149    fn end(&self) -> Self::Offset {
150        self.start + self.len as u32
151    }
152}
153
154#[cfg(test)]
155mod test {
156    use insta::assert_snapshot;
157    use pr::Literal;
158
159    use super::*;
160
161    #[test]
162    fn test_string_quoting() {
163        fn make_str(s: &str) -> Literal {
164            Literal::Text(s.to_string())
165        }
166
167        assert_snapshot!(
168            make_str("hello").to_string(),
169            @r###""hello""###
170        );
171
172        assert_snapshot!(
173            make_str(r#"he's nice"#).to_string(),
174            @r#""he's nice""#
175        );
176
177        assert_snapshot!(
178            make_str(r#"he said "what up""#).to_string(),
179            @r#""""he said "what up"""""#
180        );
181
182        assert_snapshot!(
183            make_str(r#"he said "what's up""#).to_string(),
184            @r#""""he said "what's up"""""#
185        );
186
187        assert_snapshot!(
188            make_str(r#" single' three double""" four double"""" "#).to_string(),
189            @r#"""""" single' three double""" four double"""" """"""#
190
191        );
192
193        assert_snapshot!(
194            make_str(r#""Starts with a double quote and ' contains a single quote"#).to_string(),
195            @r#"""""Starts with a double quote and ' contains a single quote""""#
196        );
197    }
198
199    #[test]
200    fn test_string_escapes() {
201        assert_snapshot!(
202            Literal::Text(r#"hello\nworld"#.to_string()).to_string(),
203            @r###""hello\\nworld""###
204        );
205
206        assert_snapshot!(
207            Literal::Text(r#"hello\tworld"#.to_string()).to_string(),
208            @r###""hello\\tworld""###
209        );
210
211        assert_snapshot!(
212            Literal::Text(r#"hello
213            world"#.to_string()).to_string(),
214            @r###""hello\n            world""###
215        );
216    }
217
218    #[test]
219    fn test_raw_string_quoting() {
220        // TODO: add some test for escapes
221        fn make_str(s: &str) -> Literal {
222            Literal::Text(s.to_string())
223        }
224
225        assert_snapshot!(
226            make_str("hello").to_string(),
227            @r#""hello""#
228        );
229    }
230}