Skip to main content

mimium_lang/compiler/parser/
token.rs

1/// Token definition for parsing
2/// Each token stores its kind, start position (byte offset), and length
3use std::fmt;
4
5/// Token kinds - types of tokens without embedded data
6/// Literals store only position information, not the actual value
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
8pub enum TokenKind {
9    // Identifiers and literals
10    Ident,          // Generic identifier
11    IdentFunction,  // Function name in declaration
12    IdentParameter, // Function parameter
13    IdentVariable,  // Variable name (for future use)
14    MacroExpand,
15
16    // Type keywords
17    FloatType,
18    IntegerType,
19    StringType,
20    StructType,
21
22    // Literals - value can be reconstructed from source text and position
23    Float,
24    Int,
25    Str,
26
27    // Operators
28    OpSum,          // +
29    OpMinus,        // -
30    OpProduct,      // *
31    OpDivide,       // /
32    OpEqual,        // ==
33    OpNotEqual,     // !=
34    OpLessThan,     // <
35    OpLessEqual,    // <=
36    OpGreaterThan,  // >
37    OpGreaterEqual, // >=
38    OpModulo,       // %
39    OpExponent,     // ^
40    OpAt,           // @
41    OpAnd,          // &&
42    OpOr,           // ||
43    OpPipe,         // |>
44    OpPipeMacro,    // ||>
45    OpUnknown,      // Other operators
46
47    // Special literals
48    SelfLit,
49    Now,
50    SampleRate,
51
52    // Punctuation
53    Comma,       // ,
54    Dot,         // .
55    DoubleDot,   // ..
56    Colon,       // :
57    DoubleColon, // ::
58    SemiColon,   // ;
59
60    // Keywords
61    Let,
62    LetRec,
63    Assign, // =
64
65    // Brackets
66    ParenBegin,        // (
67    ParenEnd,          // )
68    ArrayBegin,        // [
69    ArrayEnd,          // ]
70    BlockBegin,        // {
71    BlockEnd,          // }
72    LambdaArgBeginEnd, // |
73    BackQuote,         // `
74    Dollar,            // $
75
76    // Function and flow control
77    Function,    // fn
78    Macro,       // macro
79    Arrow,       // ->
80    LeftArrow,   // <-
81    FatArrow,    // =>
82    PlaceHolder, // _
83    If,
84    Else,
85    Match, // match
86
87    // Directives
88    Include,
89    Sharp,    // #
90    StageKwd, // stage
91    Main,     // main
92
93    // Module keywords
94    Mod,   // mod
95    Use,   // use
96    Pub,   // pub
97    Type,  // type
98    Alias, // alias
99    Rec,   // rec
100
101    // Trivia (whitespace and comments)
102    LineBreak,
103    Whitespace,
104    SingleLineComment,
105    MultiLineComment,
106
107    // Special
108    Error, // Error token for recovery
109    Eof,
110}
111
112impl fmt::Display for TokenKind {
113    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
114        match self {
115            TokenKind::Ident => write!(f, "identifier"),
116            TokenKind::IdentFunction => write!(f, "identifier_function"),
117            TokenKind::IdentParameter => write!(f, "identifier_parameter"),
118            TokenKind::IdentVariable => write!(f, "identifier_variable"),
119            TokenKind::MacroExpand => write!(f, "macro_expand"),
120            TokenKind::FloatType => write!(f, "float"),
121            TokenKind::IntegerType => write!(f, "int"),
122            TokenKind::StringType => write!(f, "string"),
123            TokenKind::StructType => write!(f, "struct"),
124            TokenKind::Float => write!(f, "float_literal"),
125            TokenKind::Int => write!(f, "int_literal"),
126            TokenKind::Str => write!(f, "string_literal"),
127            TokenKind::OpSum => write!(f, "+"),
128            TokenKind::OpMinus => write!(f, "-"),
129            TokenKind::OpProduct => write!(f, "*"),
130            TokenKind::OpDivide => write!(f, "/"),
131            TokenKind::OpEqual => write!(f, "=="),
132            TokenKind::OpNotEqual => write!(f, "!="),
133            TokenKind::OpLessThan => write!(f, "<"),
134            TokenKind::OpLessEqual => write!(f, "<="),
135            TokenKind::OpGreaterThan => write!(f, ">"),
136            TokenKind::OpGreaterEqual => write!(f, ">="),
137            TokenKind::OpModulo => write!(f, "%"),
138            TokenKind::OpExponent => write!(f, "^"),
139            TokenKind::OpAt => write!(f, "@"),
140            TokenKind::OpAnd => write!(f, "&&"),
141            TokenKind::OpOr => write!(f, "||"),
142            TokenKind::OpPipe => write!(f, "|>"),
143            TokenKind::OpPipeMacro => write!(f, "||>"),
144            TokenKind::OpUnknown => write!(f, "unknown_op"),
145            TokenKind::SelfLit => write!(f, "self"),
146            TokenKind::Now => write!(f, "now"),
147            TokenKind::SampleRate => write!(f, "samplerate"),
148            TokenKind::Comma => write!(f, ","),
149            TokenKind::Dot => write!(f, "."),
150            TokenKind::DoubleDot => write!(f, ".."),
151            TokenKind::Colon => write!(f, ":"),
152            TokenKind::DoubleColon => write!(f, "::"),
153            TokenKind::SemiColon => write!(f, ";"),
154            TokenKind::Let => write!(f, "let"),
155            TokenKind::LetRec => write!(f, "letrec"),
156            TokenKind::Assign => write!(f, "="),
157            TokenKind::ParenBegin => write!(f, "("),
158            TokenKind::ParenEnd => write!(f, ")"),
159            TokenKind::ArrayBegin => write!(f, "["),
160            TokenKind::ArrayEnd => write!(f, "]"),
161            TokenKind::BlockBegin => write!(f, "{{"),
162            TokenKind::BlockEnd => write!(f, "}}"),
163            TokenKind::LambdaArgBeginEnd => write!(f, "|"),
164            TokenKind::Function => write!(f, "fn"),
165            TokenKind::Macro => write!(f, "macro"),
166            TokenKind::Arrow => write!(f, "->"),
167            TokenKind::LeftArrow => write!(f, "<-"),
168            TokenKind::FatArrow => write!(f, "=>"),
169            TokenKind::PlaceHolder => write!(f, "_"),
170            TokenKind::If => write!(f, "if"),
171            TokenKind::Else => write!(f, "else"),
172            TokenKind::Match => write!(f, "match"),
173            TokenKind::Include => write!(f, "include"),
174            TokenKind::LineBreak => write!(f, "linebreak"),
175            TokenKind::Whitespace => write!(f, "whitespace"),
176            TokenKind::SingleLineComment => write!(f, "single_line_comment"),
177            TokenKind::MultiLineComment => write!(f, "multi_line_comment"),
178            TokenKind::BackQuote => write!(f, "`"),
179            TokenKind::Dollar => write!(f, "$"),
180            TokenKind::StageKwd => write!(f, "stage"),
181            TokenKind::Main => write!(f, "main"),
182            TokenKind::Mod => write!(f, "mod"),
183            TokenKind::Use => write!(f, "use"),
184            TokenKind::Pub => write!(f, "pub"),
185            TokenKind::Type => write!(f, "type"),
186            TokenKind::Alias => write!(f, "alias"),
187            TokenKind::Rec => write!(f, "rec"),
188            TokenKind::Sharp => write!(f, "#"),
189            TokenKind::Error => write!(f, "error"),
190            TokenKind::Eof => write!(f, "eof"),
191        }
192    }
193}
194
195/// A token with position information
196/// The actual value can be retrieved from the source text using start and length
197#[derive(Debug, Clone, Copy, PartialEq, Eq)]
198pub struct Token {
199    pub kind: TokenKind,
200    pub start: usize,  // byte offset in source
201    pub length: usize, // length in bytes
202}
203
204impl Token {
205    pub fn new(kind: TokenKind, start: usize, length: usize) -> Self {
206        Self {
207            kind,
208            start,
209            length,
210        }
211    }
212
213    /// Get the end position of this token
214    pub fn end(&self) -> usize {
215        self.start + self.length
216    }
217
218    /// Get the text of this token from the source
219    pub fn text<'a>(&self, source: &'a str) -> &'a str {
220        &source[self.start..self.end()]
221    }
222
223    /// Check if this token is trivia (whitespace or comment)
224    pub fn is_trivia(&self) -> bool {
225        matches!(
226            self.kind,
227            TokenKind::LineBreak
228                | TokenKind::Whitespace
229                | TokenKind::SingleLineComment
230                | TokenKind::MultiLineComment
231        )
232    }
233
234    /// Check if this token is an error token
235    pub fn is_error(&self) -> bool {
236        self.kind == TokenKind::Error
237    }
238}
239
240impl fmt::Display for Token {
241    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
242        write!(f, "{}@{}:{}", self.kind, self.start, self.length)
243    }
244}