lexer/
token.rs

1//! This module contains the `Token` and `TokenKind` structs, which represent the
2//! tokens generated by the lexer.
3use std::fmt::Display;
4
5use serde::Serialize;
6use shared::span::Span;
7
8/// Represents a token in the lexer.
9///
10/// Tokens are the smallest units of a programming language. They represent
11/// individual elements such as keywords, identifiers, literals, and symbols.
12/// Tokens are used by the lexer to break down the source code into meaningful
13/// components that can be processed by the parser.
14///
15/// Tokens are useful because they provide a structured representation of the
16/// source code, allowing for easier analysis, interpretation, and transformation
17/// of the code. They serve as the foundation for building compilers, interpreters,
18/// and other language processing tools.
19#[derive(PartialEq, Debug, Clone)]
20pub struct Token {
21    pub kind: TokenKind,
22    pub span: Span,
23}
24
25/// Implements the `Display` trait for the `Token` struct.
26/// This allows the `Token` struct to be formatted as a string.
27impl Display for Token {
28    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29        write!(
30            f,
31            "Token::{:?} ('{}', <{}:{}>)",
32            self.kind, self.kind, self.span.start, self.span.end
33        )
34    }
35}
36
37/// Represents a specific variant of a token.
38///
39/// A `TokenKind` can be one of several variants, such as `Identifier`, `Int`,
40/// etc. Variants may contain additional data specific to that kind of token.
41#[derive(PartialEq, Debug, Clone, Serialize)]
42pub enum TokenKind {
43    // Special
44    // -------
45    NewLine,
46    /// Represents a character that does not match any other token.
47    Illegal(String),
48    /// Represents a multi-line comment `/*` that does not have a corresponding `*/`.
49    UnterminatedComment,
50    /// Represents a string literal that does not have a closing quote.
51    UnterminatedString,
52    /// Represents the end of the file.
53    Eof,
54
55    // Value holders
56    // -------------
57    Identifier {
58        name: String,
59    },
60    Int(String),
61    Float(String),
62    String(String),
63
64    // Arithmetic operators
65    // --------------------
66    Assign, // =
67    Plus,   // +
68    Minus,  // -
69    Mult,   // *
70    Div,    // /
71    Mod,    // %
72
73    // Comparison operators
74    // --------------------
75    Lt,    // <
76    LtEq,  // <=
77    Gt,    // >
78    GtEq,  // >=
79    Eq,    // ==
80    NotEq, // !=
81
82    // Delimiters
83    // ----------
84    Comma,     // ,
85    Semicolon, // ;
86    Colon,     // :
87
88    // Brackets
89    // --------
90    LParen,   // (
91    RParen,   // )
92    LCurly,   // {
93    RCurly,   // }
94    LBracket, // [
95    RBracket, // ]
96
97    // Keywords
98    // --------
99    DefineFunction,
100    Set,
101    True,
102    False,
103    If,
104    Otherwise,
105    Return,
106    Then,
107    End,
108    Repeat,
109    Times,
110    Until,
111    Forever,
112    Display,
113
114    // Boolean operator keywords
115    // -------------------------
116    Not,
117    And,
118    Or,
119}
120
121impl TokenKind {
122    /// Looks up an identifier and returns the corresponding token kind.
123    ///
124    /// This function serves as a mapping, providing a single point of truth defining all
125    /// the keywords in the language. The lexer module uses this function to generate
126    /// tokens for keywords.
127    ///
128    /// # Arguments
129    ///
130    /// * `ident` - The identifier to look up.
131    ///
132    /// # Returns
133    ///
134    /// The corresponding token kind for the identifier.
135    pub fn lookup_ident(ident: &str) -> Self {
136        match ident {
137            "defineFunction" => Self::DefineFunction,
138            "set" => Self::Set,
139            "if" => Self::If,
140            "otherwise" => Self::Otherwise,
141            "true" => Self::True,
142            "false" => Self::False,
143            "return" => Self::Return,
144            "then" => Self::Then,
145            "end" => Self::End,
146            "repeat" => Self::Repeat,
147            "times" => Self::Times,
148            "until" => Self::Until,
149            "forever" => Self::Forever,
150            "display" => Self::Display,
151            "not" => Self::Not,
152            "and" => Self::And,
153            "or" => Self::Or,
154            _ => Self::Identifier {
155                name: ident.to_string(),
156            },
157        }
158    }
159}
160
161/// Implements the `Display` trait for `TokenKind`.
162/// This allows `TokenKind` to be formatted as a string when using the `write!` macro.
163impl Display for TokenKind {
164    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
165        // Match the `TokenKind` variant and assign the corresponding string literal.
166        let string_literal = match self {
167            Self::Identifier { name } => name,
168            Self::Int(num) => num,
169            Self::Float(num) => num,
170            Self::String(string) => string,
171            Self::Illegal(string) => string,
172            Self::Assign => "=",
173            Self::Plus => "+",
174            Self::Minus => "-",
175            Self::Mult => "*",
176            Self::Div => "/",
177            Self::Mod => "%",
178            Self::Lt => "<",
179            Self::LtEq => "<=",
180            Self::Gt => ">",
181            Self::GtEq => ">=",
182            Self::Eq => "==",
183            Self::NotEq => "!=",
184            Self::Comma => ",",
185            Self::Semicolon => ";",
186            Self::Colon => ":",
187            Self::LParen => "(",
188            Self::RParen => ")",
189            Self::LCurly => "{",
190            Self::RCurly => "}",
191            Self::LBracket => "[",
192            Self::RBracket => "]",
193            Self::DefineFunction => "defineFunction",
194            Self::Set => "set",
195            Self::If => "if",
196            Self::Otherwise => "otherwise",
197            Self::True => "true",
198            Self::False => "false",
199            Self::Return => "return",
200            Self::Then => "then",
201            Self::End => "end",
202            Self::Repeat => "repeat",
203            Self::Times => "times",
204            Self::Until => "until",
205            Self::Forever => "forever",
206            Self::Display => "display",
207            Self::Not => "not",
208            Self::And => "and",
209            Self::Or => "or",
210            Self::Eof => "<EOF>",
211            Self::UnterminatedComment => "<UnterminatedComment>",
212            Self::UnterminatedString => "<UnterminatedString>",
213            Self::NewLine => "\\n",
214        }
215        .to_string();
216
217        // Write the string literal to the formatter.
218        write!(f, "{}", string_literal)
219    }
220}