lucia_lang/compiler/
token.rs

1//! The parsed token generated by the lexer.
2
3use std::fmt;
4
5use crate::utils::Location;
6
7use super::lexer::LexerError;
8
9/// Enum representing common lexeme types.
10#[derive(Debug, Clone, PartialEq)]
11pub enum TokenKind {
12    // Multi-char tokens:
13    /// "if"
14    If,
15    /// "else"
16    Else,
17    /// "loop"
18    Loop,
19    /// "while"
20    While,
21    /// "for"
22    For,
23    /// "in"
24    In,
25    /// "break"
26    Break,
27    /// "continue"
28    Continue,
29    /// "return"
30    Return,
31    /// "throw"
32    Throw,
33    /// "global"
34    Global,
35    /// "import"
36    Import,
37    /// "as"
38    As,
39    /// "is"
40    Is,
41    /// "not"
42    Not,
43    /// "and"
44    And,
45    /// "or"
46    Or,
47    /// "try"
48    Try,
49    /// "fn"
50    Fn,
51    /// "do"
52    Do,
53    /// "null"
54    Null,
55    /// "true"
56    True,
57    /// "false"
58    False,
59
60    // Two-char tokens:
61    /// "::"
62    DoubleColon,
63    /// "=="
64    Eq,
65    /// "!="
66    NotEq,
67    /// "<="
68    LtEq,
69    /// ">="
70    GtEq,
71    /// "+="
72    AddAssign,
73    /// "-="
74    SubAssign,
75    /// "*="
76    MulAssign,
77    /// "/="
78    DivAssign,
79    /// "%="
80    ModAssign,
81
82    // One-char tokens:
83    /// ","
84    Comma,
85    /// "."
86    Dot,
87    /// "("
88    OpenParen,
89    /// ")"
90    CloseParen,
91    /// "{"
92    OpenBrace,
93    /// "}"
94    CloseBrace,
95    /// "["
96    OpenBracket,
97    /// "]"
98    CloseBracket,
99    /// "#"
100    Pound,
101    /// "?"
102    Question,
103    /// ":"
104    Colon,
105    /// "="
106    Assign,
107    /// "<"
108    Lt,
109    /// ">"
110    Gt,
111    /// "|"
112    VBar,
113    /// "+"
114    Add,
115    /// "-"
116    Sub,
117    /// "*"
118    Mul,
119    /// "/"
120    Div,
121    /// "%"
122    Mod,
123
124    // other
125    /// End of line (`\n`)
126    EOL,
127    /// "// comment"
128    LineComment,
129    /// "/* block comment */"
130    ///
131    /// Block comments can be recursive, so the sequence like `/* /* */`
132    /// will not be considered terminated and will result in a parsing error.
133    BlockComment,
134    /// Any whitespace characters sequence.
135    Whitespace,
136    /// Ident
137    Ident(String),
138    /// "12", "1.0e-40", ""123"". See `LiteralKind` for more details.
139    Literal(LiteralKind),
140    /// Unknown token, not expected by the lexer, e.g. "№"
141    Unknown(char),
142}
143
144impl fmt::Display for TokenKind {
145    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
146        match self {
147            Self::If => write!(f, "If (if)"),
148            Self::Else => write!(f, "Else (else)"),
149            Self::Loop => write!(f, "Loop (loop)"),
150            Self::While => write!(f, "While (while)"),
151            Self::For => write!(f, "For (for)"),
152            Self::In => write!(f, "In (in)"),
153            Self::Break => write!(f, "Break (break)"),
154            Self::Continue => write!(f, "Continue (continue)"),
155            Self::Return => write!(f, "Return (return)"),
156            Self::Throw => write!(f, "Throw (throw)"),
157            Self::Global => write!(f, "Global (global)"),
158            Self::Import => write!(f, "Import (import)"),
159            Self::As => write!(f, "As (as)"),
160            Self::Is => write!(f, "Is (is)"),
161            Self::Not => write!(f, "Not (not)"),
162            Self::And => write!(f, "And (and)"),
163            Self::Or => write!(f, "Or (or)"),
164            Self::Try => write!(f, "Try (try)"),
165            Self::Fn => write!(f, "Fn (fn)"),
166            Self::Do => write!(f, "Do (do)"),
167            Self::Null => write!(f, "Null (null)"),
168            Self::True => write!(f, "True (true)"),
169            Self::False => write!(f, "False (false)"),
170            Self::DoubleColon => write!(f, "DoubleColon (::)"),
171            Self::Eq => write!(f, "Eq (==)"),
172            Self::NotEq => write!(f, "NotEq (!=)"),
173            Self::LtEq => write!(f, "LtEq (<=)"),
174            Self::GtEq => write!(f, "GtEq (>=)"),
175            Self::AddAssign => write!(f, "AddAssign (+=)"),
176            Self::SubAssign => write!(f, "SubAssign (-=)"),
177            Self::MulAssign => write!(f, "MulAssign (*=)"),
178            Self::DivAssign => write!(f, "DivAssign (/=)"),
179            Self::ModAssign => write!(f, "ModAssign (%=)"),
180            Self::Comma => write!(f, "Comma (,)"),
181            Self::Dot => write!(f, "Dot (.)"),
182            Self::OpenParen => write!(f, "OpenParen (())"),
183            Self::CloseParen => write!(f, "CloseParen ())"),
184            Self::OpenBrace => write!(f, "OpenBrace ({{)"),
185            Self::CloseBrace => write!(f, "CloseBrace (}})"),
186            Self::OpenBracket => write!(f, "OpenBracket ([)"),
187            Self::CloseBracket => write!(f, "CloseBracket (])"),
188            Self::Pound => write!(f, "Pound (#)"),
189            Self::Question => write!(f, "Question (?)"),
190            Self::Colon => write!(f, "Colon (:)"),
191            Self::Assign => write!(f, "Assign (=)"),
192            Self::Lt => write!(f, "Lt (<)"),
193            Self::Gt => write!(f, "Gt (>)"),
194            Self::VBar => write!(f, "VBar (|)"),
195            Self::Add => write!(f, "Add (+)"),
196            Self::Sub => write!(f, "Sub (-)"),
197            Self::Mul => write!(f, "Mul (*)"),
198            Self::Div => write!(f, "Div (/)"),
199            Self::Mod => write!(f, "Mod (%)"),
200            Self::EOL => write!(f, "EOL (\\n)"),
201            Self::LineComment => write!(f, "LineComment (// ...)"),
202            Self::BlockComment => write!(f, "BlockComment (/* ... */)"),
203            Self::Whitespace => write!(f, "Whitespace ( )"),
204            Self::Ident(v) => write!(f, "Ident ({})", v),
205            Self::Literal(v) => write!(f, "Literal ({})", v),
206            Self::Unknown(v) => write!(f, "Unknown({})", v),
207        }
208    }
209}
210
211/// Enum representing literal types, included wrong literal like unterminated string.
212#[derive(Debug, Clone, PartialEq)]
213pub enum LiteralKind {
214    /// "12", "0o100", "0b110"
215    Int(Result<i64, LexerError>),
216    /// "12.34", "0b100.100"
217    Float(Result<f64, LexerError>),
218    /// ""abc"", ""abc"
219    Str(Result<String, LexerError>),
220}
221
222impl fmt::Display for LiteralKind {
223    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
224        match self {
225            LiteralKind::Int(v) => match v {
226                Ok(v) => write!(f, "{}", v),
227                Err(v) => write!(f, "{}", v),
228            },
229            LiteralKind::Float(v) => match v {
230                Ok(v) => write!(f, "{}", v),
231                Err(v) => write!(f, "{}", v),
232            },
233            LiteralKind::Str(v) => match v {
234                Ok(v) => write!(f, "{}", v),
235                Err(v) => write!(f, "{}", v),
236            },
237        }
238    }
239}
240
241/// Parsed token.
242#[derive(Debug, Clone, PartialEq)]
243pub struct Token {
244    /// The kind of Token.
245    pub kind: TokenKind,
246    /// The start location of Token.
247    pub start: Location,
248    /// The end location of Token.
249    pub end: Location,
250}
251
252impl Token {
253    /// Constructs a new `Token`.
254    pub fn new(kind: TokenKind, start: Location, end: Location) -> Self {
255        Token { kind, start, end }
256    }
257
258    /// Constructs a fake Token.
259    pub fn dummy() -> Self {
260        Token {
261            kind: TokenKind::Unknown(' '),
262            start: Location {
263                lineno: 1,
264                column: 1,
265                offset: 0,
266            },
267            end: Location {
268                lineno: 1,
269                column: 1,
270                offset: 0,
271            },
272        }
273    }
274}
275
276impl fmt::Display for Token {
277    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
278        write!(
279            f,
280            "Token {:20} start: {}, end: {})",
281            format!("{}", self.kind),
282            self.start,
283            self.end
284        )
285    }
286}
287
288/// Type of Token. Common Token, Idnet or Literal.
289#[derive(Debug, Clone, PartialEq)]
290pub enum TokenType {
291    /// Common Token
292    Token(TokenKind),
293    /// Ident
294    Ident,
295    /// Literal
296    Literal,
297}
298
299impl fmt::Display for TokenType {
300    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
301        match self {
302            Self::Token(t) => write!(f, "{}", t),
303            Self::Ident => write!(f, "Ident"),
304            Self::Literal => write!(f, "Literal"),
305        }
306    }
307}