lexer/token.rs
1//! This module contains the `Token` and `TokenKind` structs, which represent the
2//! tokens generated by the lexer.
3use std::fmt::Display;
4
5use serde::Serialize;
6use shared::span::Span;
7
8/// Represents a token in the lexer.
9///
10/// Tokens are the smallest units of a programming language. They represent
11/// individual elements such as keywords, identifiers, literals, and symbols.
12/// Tokens are used by the lexer to break down the source code into meaningful
13/// components that can be processed by the parser.
14///
15/// Tokens are useful because they provide a structured representation of the
16/// source code, allowing for easier analysis, interpretation, and transformation
17/// of the code. They serve as the foundation for building compilers, interpreters,
18/// and other language processing tools.
19#[derive(PartialEq, Debug, Clone)]
20pub struct Token {
21 pub kind: TokenKind,
22 pub span: Span,
23}
24
25/// Implements the `Display` trait for the `Token` struct.
26/// This allows the `Token` struct to be formatted as a string.
27impl Display for Token {
28 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29 write!(
30 f,
31 "Token::{:?} ('{}', <{}:{}>)",
32 self.kind, self.kind, self.span.start, self.span.end
33 )
34 }
35}
36
37/// Represents a specific variant of a token.
38///
39/// A `TokenKind` can be one of several variants, such as `Identifier`, `Int`,
40/// etc. Variants may contain additional data specific to that kind of token.
41#[derive(PartialEq, Debug, Clone, Serialize)]
42pub enum TokenKind {
43 // Special
44 // -------
45 NewLine,
46 /// Represents a character that does not match any other token.
47 Illegal(String),
48 /// Represents a multi-line comment `/*` that does not have a corresponding `*/`.
49 UnterminatedComment,
50 /// Represents a string literal that does not have a closing quote.
51 UnterminatedString,
52 /// Represents the end of the file.
53 Eof,
54
55 // Value holders
56 // -------------
57 Identifier {
58 name: String,
59 },
60 Int(String),
61 Float(String),
62 String(String),
63
64 // Arithmetic operators
65 // --------------------
66 Assign, // =
67 Plus, // +
68 Minus, // -
69 Mult, // *
70 Div, // /
71 Mod, // %
72
73 // Comparison operators
74 // --------------------
75 Lt, // <
76 LtEq, // <=
77 Gt, // >
78 GtEq, // >=
79 Eq, // ==
80 NotEq, // !=
81
82 // Delimiters
83 // ----------
84 Comma, // ,
85 Semicolon, // ;
86 Colon, // :
87
88 // Brackets
89 // --------
90 LParen, // (
91 RParen, // )
92 LCurly, // {
93 RCurly, // }
94 LBracket, // [
95 RBracket, // ]
96
97 // Keywords
98 // --------
99 DefineFunction,
100 Set,
101 True,
102 False,
103 If,
104 Otherwise,
105 Return,
106 Then,
107 End,
108 Repeat,
109 Times,
110 Until,
111 Forever,
112 Display,
113
114 // Boolean operator keywords
115 // -------------------------
116 Not,
117 And,
118 Or,
119}
120
121impl TokenKind {
122 /// Looks up an identifier and returns the corresponding token kind.
123 ///
124 /// This function serves as a mapping, providing a single point of truth defining all
125 /// the keywords in the language. The lexer module uses this function to generate
126 /// tokens for keywords.
127 ///
128 /// # Arguments
129 ///
130 /// * `ident` - The identifier to look up.
131 ///
132 /// # Returns
133 ///
134 /// The corresponding token kind for the identifier.
135 pub fn lookup_ident(ident: &str) -> Self {
136 match ident {
137 "defineFunction" => Self::DefineFunction,
138 "set" => Self::Set,
139 "if" => Self::If,
140 "otherwise" => Self::Otherwise,
141 "true" => Self::True,
142 "false" => Self::False,
143 "return" => Self::Return,
144 "then" => Self::Then,
145 "end" => Self::End,
146 "repeat" => Self::Repeat,
147 "times" => Self::Times,
148 "until" => Self::Until,
149 "forever" => Self::Forever,
150 "display" => Self::Display,
151 "not" => Self::Not,
152 "and" => Self::And,
153 "or" => Self::Or,
154 _ => Self::Identifier {
155 name: ident.to_string(),
156 },
157 }
158 }
159}
160
161/// Implements the `Display` trait for `TokenKind`.
162/// This allows `TokenKind` to be formatted as a string when using the `write!` macro.
163impl Display for TokenKind {
164 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
165 // Match the `TokenKind` variant and assign the corresponding string literal.
166 let string_literal = match self {
167 Self::Identifier { name } => name,
168 Self::Int(num) => num,
169 Self::Float(num) => num,
170 Self::String(string) => string,
171 Self::Illegal(string) => string,
172 Self::Assign => "=",
173 Self::Plus => "+",
174 Self::Minus => "-",
175 Self::Mult => "*",
176 Self::Div => "/",
177 Self::Mod => "%",
178 Self::Lt => "<",
179 Self::LtEq => "<=",
180 Self::Gt => ">",
181 Self::GtEq => ">=",
182 Self::Eq => "==",
183 Self::NotEq => "!=",
184 Self::Comma => ",",
185 Self::Semicolon => ";",
186 Self::Colon => ":",
187 Self::LParen => "(",
188 Self::RParen => ")",
189 Self::LCurly => "{",
190 Self::RCurly => "}",
191 Self::LBracket => "[",
192 Self::RBracket => "]",
193 Self::DefineFunction => "defineFunction",
194 Self::Set => "set",
195 Self::If => "if",
196 Self::Otherwise => "otherwise",
197 Self::True => "true",
198 Self::False => "false",
199 Self::Return => "return",
200 Self::Then => "then",
201 Self::End => "end",
202 Self::Repeat => "repeat",
203 Self::Times => "times",
204 Self::Until => "until",
205 Self::Forever => "forever",
206 Self::Display => "display",
207 Self::Not => "not",
208 Self::And => "and",
209 Self::Or => "or",
210 Self::Eof => "<EOF>",
211 Self::UnterminatedComment => "<UnterminatedComment>",
212 Self::UnterminatedString => "<UnterminatedString>",
213 Self::NewLine => "\\n",
214 }
215 .to_string();
216
217 // Write the string literal to the formatter.
218 write!(f, "{}", string_literal)
219 }
220}