Skip to main content

mq_lang/lexer/
token.rs

1use std::fmt::{self, Display, Formatter};
2
3use itertools::Itertools;
4use smol_str::SmolStr;
5
6use crate::{ArenaId, module::ModuleId, number::Number, range::Range};
7#[cfg(feature = "ast-json")]
8use serde::{Deserialize, Serialize};
9
10#[cfg_attr(feature = "ast-json", derive(Serialize, Deserialize))]
11#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq)]
12pub enum StringSegment {
13    Text(String, Range),
14    Expr(SmolStr, Range),
15}
16
17impl Display for StringSegment {
18    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
19        match self {
20            StringSegment::Text(text, _) => write!(f, "{}", text),
21            StringSegment::Expr(expr, _) => write!(f, "${{{}}}", expr),
22        }
23    }
24}
25
26fn default_module_id() -> ModuleId {
27    ArenaId::new(0)
28}
29
30#[cfg_attr(feature = "ast-json", derive(Serialize, Deserialize))]
31#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone)]
32pub struct Token {
33    pub range: Range,
34    pub kind: TokenKind,
35    #[cfg_attr(
36        feature = "ast-json",
37        serde(skip_serializing, skip_deserializing, default = "default_module_id")
38    )]
39    pub module_id: ModuleId,
40}
41
42#[cfg_attr(feature = "ast-json", derive(Serialize, Deserialize))]
43#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone)]
44/// Represents the kind of a token in the mq language.
45///
46/// TokenKind variants are sorted alphabetically for maintainability.
47pub enum TokenKind {
48    And,
49    Arrow,
50    As,
51    Convert,
52    Asterisk,
53    BoolLiteral(bool),
54    BytesLiteral(Vec<u8>),
55    Break,
56    Catch,
57    Coalesce,
58    Colon,
59    DoubleColon,
60    DoubleSlashEqual,
61    Comma,
62    Comment(String),
63    Continue,
64    Def,
65    Do,
66    Elif,
67    Else,
68    End,
69    Env(SmolStr),
70    Eof,
71    Equal,
72    EqEq,
73    Fn,
74    Foreach,
75    Gt,
76    Gte,
77    Ident(SmolStr),
78    If,
79    Include,
80    InterpolatedString(Vec<StringSegment>),
81    Import,
82    LBrace,
83    LBracket,
84    Let,
85    LeftShift,
86    Loop,
87    Lt,
88    Lte,
89    Macro,
90    Match,
91    Module,
92    Minus,
93    MinusEqual,
94    NeEq,
95    NewLine,
96    Nodes,
97    None,
98    Not,
99    NumberLiteral(Number),
100    Or,
101    Percent,
102    PercentEqual,
103    Pipe,
104    PipeEqual,
105    Plus,
106    PlusEqual,
107    Question,
108    Quote,
109    RBrace,
110    DoubleDot,
111    RBracket,
112    RightShift,
113    RParen,
114    Selector(SmolStr),
115    Self_,
116    SemiColon,
117    Slash,
118    SlashEqual,
119    StringLiteral(String),
120    StarEqual,
121    Tab(usize),
122    TildeEqual,
123    NotTildeEqual,
124    Try,
125    Unquote,
126    Whitespace(usize),
127    While,
128    LParen,
129    Var,
130}
131
132impl Token {
133    pub fn new(kind: TokenKind) -> Self {
134        Self {
135            kind,
136            range: Range::default(),
137            module_id: default_module_id(),
138        }
139    }
140
141    #[inline(always)]
142    pub fn is_eof(&self) -> bool {
143        matches!(self.kind, TokenKind::Eof)
144    }
145
146    #[inline(always)]
147    pub fn is_selector(&self) -> bool {
148        matches!(self.kind, TokenKind::Selector(_))
149    }
150}
151
152impl Display for Token {
153    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
154        write!(f, "{}", self.kind)
155    }
156}
157
158impl Display for TokenKind {
159    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
160        match &self {
161            TokenKind::And => write!(f, "&&"),
162            TokenKind::Arrow => write!(f, "->"),
163            TokenKind::As => write!(f, "as"),
164            TokenKind::Convert => write!(f, "@"),
165            TokenKind::Or => write!(f, "||"),
166            TokenKind::Not => write!(f, "!"),
167            TokenKind::Asterisk => write!(f, "*"),
168            TokenKind::BoolLiteral(b) => write!(f, "{}", b),
169            TokenKind::BytesLiteral(b) => {
170                write!(f, "b\"")?;
171                for byte in b {
172                    if byte.is_ascii_graphic() && *byte != b'"' && *byte != b'\\' {
173                        write!(f, "{}", *byte as char)?;
174                    } else {
175                        write!(f, "\\x{:02x}", byte)?;
176                    }
177                }
178                write!(f, "\"")
179            }
180            TokenKind::Break => write!(f, "break"),
181            TokenKind::Colon => write!(f, ":"),
182            TokenKind::Comma => write!(f, ","),
183            TokenKind::Continue => write!(f, "continue"),
184            TokenKind::Coalesce => write!(f, "??"),
185            TokenKind::Comment(comment) => write!(f, "# {}", comment.trim()),
186            TokenKind::Def => write!(f, "def"),
187            TokenKind::Do => write!(f, "do"),
188            TokenKind::DoubleColon => write!(f, "::"),
189            TokenKind::DoubleSlashEqual => write!(f, "//="),
190            TokenKind::Elif => write!(f, "elif"),
191            TokenKind::Else => write!(f, "else"),
192            TokenKind::End => write!(f, "end"),
193            TokenKind::Env(env) => write!(f, "${}", env),
194            TokenKind::Eof => write!(f, ""),
195            TokenKind::Equal => write!(f, "="),
196            TokenKind::EqEq => write!(f, "=="),
197            TokenKind::Fn => write!(f, "fn"),
198            TokenKind::Foreach => write!(f, "foreach"),
199            TokenKind::Ident(ident) => write!(f, "{}", ident),
200            TokenKind::If => write!(f, "if"),
201            TokenKind::Include => write!(f, "include"),
202            TokenKind::Import => write!(f, "import"),
203            TokenKind::InterpolatedString(segments) => {
204                write!(f, "{}", segments.iter().join(""))
205            }
206            TokenKind::Lt => write!(f, "<"),
207            TokenKind::Lte => write!(f, "<="),
208            TokenKind::Gt => write!(f, ">"),
209            TokenKind::Gte => write!(f, ">="),
210            TokenKind::LBracket => write!(f, "["),
211            TokenKind::LParen => write!(f, "("),
212            TokenKind::LeftShift => write!(f, "<<"),
213            TokenKind::Let => write!(f, "let"),
214            TokenKind::Loop => write!(f, "loop"),
215            TokenKind::Macro => write!(f, "macro"),
216            TokenKind::Match => write!(f, "match"),
217            TokenKind::Module => write!(f, "module"),
218            TokenKind::Minus => write!(f, "-"),
219            TokenKind::MinusEqual => write!(f, "-="),
220            TokenKind::Slash => write!(f, "/"),
221            TokenKind::SlashEqual => write!(f, "/="),
222            TokenKind::Percent => write!(f, "%"),
223            TokenKind::PercentEqual => write!(f, "%="),
224            TokenKind::NeEq => write!(f, "!="),
225            TokenKind::NewLine => writeln!(f),
226            TokenKind::Nodes => write!(f, "nodes"),
227            TokenKind::None => write!(f, "None"),
228            TokenKind::NumberLiteral(n) => write!(f, "{}", n),
229            TokenKind::Plus => write!(f, "+"),
230            TokenKind::PlusEqual => write!(f, "+="),
231            TokenKind::Pipe => write!(f, "|"),
232            TokenKind::PipeEqual => write!(f, "|="),
233            TokenKind::Quote => write!(f, "quote"),
234            TokenKind::DoubleDot => write!(f, ".."),
235            TokenKind::RBracket => write!(f, "]"),
236            TokenKind::RightShift => write!(f, ">>"),
237            TokenKind::RBrace => write!(f, "}}"),
238            TokenKind::RParen => write!(f, ")"),
239            TokenKind::Selector(selector) => write!(f, "{}", selector),
240            TokenKind::Self_ => write!(f, "self"),
241            TokenKind::SemiColon => write!(f, ";"),
242            TokenKind::StringLiteral(s) => write!(f, "{}", s),
243            TokenKind::StarEqual => write!(f, "*="),
244            TokenKind::Tab(n) => write!(f, "{}", "\t".repeat(*n)),
245            TokenKind::TildeEqual => write!(f, "=~"),
246            TokenKind::NotTildeEqual => write!(f, "!~"),
247            TokenKind::Try => write!(f, "try"),
248            TokenKind::Unquote => write!(f, "unquote"),
249            TokenKind::Catch => write!(f, "catch"),
250            TokenKind::While => write!(f, "while"),
251            TokenKind::Whitespace(n) => write!(f, "{}", " ".repeat(*n)),
252            TokenKind::LBrace => write!(f, "{{"),
253            TokenKind::Question => write!(f, "?"),
254            TokenKind::Var => write!(f, "var"),
255        }
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use rstest::rstest;
263
264    #[rstest]
265    #[case(
266        StringSegment::Text("hello".to_string(), Range::default()),
267        "hello"
268    )]
269    #[case(StringSegment::Expr(SmolStr::new("world"), Range::default()), "${world}")]
270    #[case(
271        StringSegment::Text("".to_string(), Range::default()),
272        ""
273    )]
274    #[case(StringSegment::Expr(SmolStr::new(""), Range::default()), "${}")]
275    fn string_segment_display_works(#[case] segment: StringSegment, #[case] expected: &str) {
276        assert_eq!(segment.to_string(), expected);
277    }
278
279    #[rstest]
280    #[case(TokenKind::Arrow, "->")]
281    #[case(TokenKind::Fn, "fn")]
282    #[case(TokenKind::Minus, "-")]
283    #[case(TokenKind::Gt, ">")]
284    fn token_kind_arrow_display(#[case] kind: TokenKind, #[case] expected: &str) {
285        assert_eq!(kind.to_string(), expected);
286    }
287}