go_parser/
token.rs

1// Copyright 2022 The Goscript Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4//
5//
6// This code is adapted from the offical Go code written in Go
7// with license as follows:
8// Copyright 2013 The Go Authors. All rights reserved.
9// Use of this source code is governed by a BSD-style
10// license that can be found in the LICENSE file.
11
12#![allow(non_camel_case_types)]
13use std::fmt;
14
15pub(crate) const LOWEST_PREC: usize = 0; // non-operators
16
17//pub(crate) const UNARY_PREC: usize = 6;
18//pub(crate) const HIGHEST_PREC: usize = 7;
19
20#[derive(Hash, Eq, PartialEq, Clone)]
21pub enum Token {
22    // Special tokens
23    NONE,
24    ILLEGAL(TokenData),
25    EOF,
26    COMMENT(TokenData),
27
28    // Identifiers and basic type literals
29    IDENT(TokenData),  // main
30    INT(TokenData),    // 12345
31    FLOAT(TokenData),  // 123.45
32    IMAG(TokenData),   // 123.45i
33    CHAR(TokenData),   // 'a'
34    STRING(TokenData), // "abc"
35    // Operator
36    ADD, // +
37    SUB, // -
38    MUL, // *
39    QUO, // /
40    REM, // %
41
42    AND,     // &
43    OR,      // |
44    XOR,     // ^
45    SHL,     // <<
46    SHR,     // >>
47    AND_NOT, // &^
48
49    ADD_ASSIGN, // +=
50    SUB_ASSIGN, // -=
51    MUL_ASSIGN, // *=
52    QUO_ASSIGN, // /=
53    REM_ASSIGN, // %=
54
55    AND_ASSIGN,     // &=
56    OR_ASSIGN,      // |=
57    XOR_ASSIGN,     // ^=
58    SHL_ASSIGN,     // <<=
59    SHR_ASSIGN,     // >>=
60    AND_NOT_ASSIGN, // &^=
61
62    LAND,  // &&
63    LOR,   // ||
64    ARROW, // <-
65    INC,   // ++
66    DEC,   // --
67
68    EQL,    // ==
69    LSS,    // <
70    GTR,    // >
71    ASSIGN, // =
72    NOT,    // !
73
74    NEQ,      // !=
75    LEQ,      // <=
76    GEQ,      // >=
77    DEFINE,   // :=
78    ELLIPSIS, // ...
79
80    LPAREN, // (
81    LBRACK, // [
82    LBRACE, // {
83    COMMA,  // ,
84    PERIOD, // .
85
86    RPAREN,               // )
87    RBRACK,               // ]
88    RBRACE,               // }
89    SEMICOLON(TokenData), // ; true if SEMICOLON is NOT inserted by scanner
90    COLON,                // :
91
92    // Keywords
93    BREAK,
94    CASE,
95    CHAN,
96    CONST,
97    CONTINUE,
98
99    DEFAULT,
100    DEFER,
101    ELSE,
102    FALLTHROUGH,
103    FOR,
104
105    FUNC,
106    GO,
107    GOTO,
108    IF,
109    IMPORT,
110
111    INTERFACE,
112    MAP,
113    PACKAGE,
114    RANGE,
115    RETURN,
116
117    SELECT,
118    STRUCT,
119    SWITCH,
120    TYPE,
121    VAR,
122}
123
124pub enum TokenType {
125    Literal,
126    Operator,
127    Keyword,
128    Other,
129}
130
131impl Token {
132    pub fn token_property(&self) -> (TokenType, &str) {
133        match self {
134            Token::NONE => (TokenType::Other, "NONE"),
135            Token::ILLEGAL(_) => (TokenType::Other, "ILLEGAL"),
136            Token::EOF => (TokenType::Other, "EOF"),
137            Token::COMMENT(_) => (TokenType::Other, "COMMENT"),
138            Token::IDENT(_) => (TokenType::Literal, "IDENT"),
139            Token::INT(_) => (TokenType::Literal, "INT"),
140            Token::FLOAT(_) => (TokenType::Literal, "FLOAT"),
141            Token::IMAG(_) => (TokenType::Literal, "IMAG"),
142            Token::CHAR(_) => (TokenType::Literal, "CHAR"),
143            Token::STRING(_) => (TokenType::Literal, "STRING"),
144            Token::ADD => (TokenType::Operator, "+"),
145            Token::SUB => (TokenType::Operator, "-"),
146            Token::MUL => (TokenType::Operator, "*"),
147            Token::QUO => (TokenType::Operator, "/"),
148            Token::REM => (TokenType::Operator, "%"),
149            Token::AND => (TokenType::Operator, "&"),
150            Token::OR => (TokenType::Operator, "|"),
151            Token::XOR => (TokenType::Operator, "^"),
152            Token::SHL => (TokenType::Operator, "<<"),
153            Token::SHR => (TokenType::Operator, ">>"),
154            Token::AND_NOT => (TokenType::Operator, "&^"),
155            Token::ADD_ASSIGN => (TokenType::Operator, "+="),
156            Token::SUB_ASSIGN => (TokenType::Operator, "-="),
157            Token::MUL_ASSIGN => (TokenType::Operator, "*="),
158            Token::QUO_ASSIGN => (TokenType::Operator, "/="),
159            Token::REM_ASSIGN => (TokenType::Operator, "%="),
160            Token::AND_ASSIGN => (TokenType::Operator, "&="),
161            Token::OR_ASSIGN => (TokenType::Operator, "|="),
162            Token::XOR_ASSIGN => (TokenType::Operator, "^="),
163            Token::SHL_ASSIGN => (TokenType::Operator, "<<="),
164            Token::SHR_ASSIGN => (TokenType::Operator, ">>="),
165            Token::AND_NOT_ASSIGN => (TokenType::Operator, "&^="),
166            Token::LAND => (TokenType::Operator, "&&"),
167            Token::LOR => (TokenType::Operator, "||"),
168            Token::ARROW => (TokenType::Operator, "<-"),
169            Token::INC => (TokenType::Operator, "++"),
170            Token::DEC => (TokenType::Operator, "--"),
171            Token::EQL => (TokenType::Operator, "=="),
172            Token::LSS => (TokenType::Operator, "<"),
173            Token::GTR => (TokenType::Operator, ">"),
174            Token::ASSIGN => (TokenType::Operator, "="),
175            Token::NOT => (TokenType::Operator, "!"),
176            Token::NEQ => (TokenType::Operator, "!="),
177            Token::LEQ => (TokenType::Operator, "<="),
178            Token::GEQ => (TokenType::Operator, ">="),
179            Token::DEFINE => (TokenType::Operator, ":="),
180            Token::ELLIPSIS => (TokenType::Operator, "..."),
181            Token::LPAREN => (TokenType::Operator, "("),
182            Token::LBRACK => (TokenType::Operator, "["),
183            Token::LBRACE => (TokenType::Operator, "{"),
184            Token::COMMA => (TokenType::Operator, ","),
185            Token::PERIOD => (TokenType::Operator, "."),
186            Token::RPAREN => (TokenType::Operator, ")"),
187            Token::RBRACK => (TokenType::Operator, "]"),
188            Token::RBRACE => (TokenType::Operator, "}"),
189            Token::SEMICOLON(_) => (TokenType::Operator, ";"),
190            Token::COLON => (TokenType::Operator, ":"),
191            Token::BREAK => (TokenType::Keyword, "break"),
192            Token::CASE => (TokenType::Keyword, "case"),
193            Token::CHAN => (TokenType::Keyword, "chan"),
194            Token::CONST => (TokenType::Keyword, "const"),
195            Token::CONTINUE => (TokenType::Keyword, "continue"),
196            Token::DEFAULT => (TokenType::Keyword, "default"),
197            Token::DEFER => (TokenType::Keyword, "defer"),
198            Token::ELSE => (TokenType::Keyword, "else"),
199            Token::FALLTHROUGH => (TokenType::Keyword, "fallthrough"),
200            Token::FOR => (TokenType::Keyword, "for"),
201            Token::FUNC => (TokenType::Keyword, "func"),
202            Token::GO => (TokenType::Keyword, "go"),
203            Token::GOTO => (TokenType::Keyword, "goto"),
204            Token::IF => (TokenType::Keyword, "if"),
205            Token::IMPORT => (TokenType::Keyword, "import"),
206            Token::INTERFACE => (TokenType::Keyword, "interface"),
207            Token::MAP => (TokenType::Keyword, "map"),
208            Token::PACKAGE => (TokenType::Keyword, "package"),
209            Token::RANGE => (TokenType::Keyword, "range"),
210            Token::RETURN => (TokenType::Keyword, "return"),
211            Token::SELECT => (TokenType::Keyword, "select"),
212            Token::STRUCT => (TokenType::Keyword, "struct"),
213            Token::SWITCH => (TokenType::Keyword, "switch"),
214            Token::TYPE => (TokenType::Keyword, "type"),
215            Token::VAR => (TokenType::Keyword, "var"),
216        }
217    }
218
219    pub fn ident_token(ident: String) -> Token {
220        match ident.as_str() {
221            "break" => Token::BREAK,
222            "case" => Token::CASE,
223            "chan" => Token::CHAN,
224            "const" => Token::CONST,
225            "continue" => Token::CONTINUE,
226            "default" => Token::DEFAULT,
227            "defer" => Token::DEFER,
228            "else" => Token::ELSE,
229            "fallthrough" => Token::FALLTHROUGH,
230            "for" => Token::FOR,
231            "func" => Token::FUNC,
232            "go" => Token::GO,
233            "goto" => Token::GOTO,
234            "if" => Token::IF,
235            "import" => Token::IMPORT,
236            "interface" => Token::INTERFACE,
237            "map" => Token::MAP,
238            "package" => Token::PACKAGE,
239            "range" => Token::RANGE,
240            "return" => Token::RETURN,
241            "select" => Token::SELECT,
242            "struct" => Token::STRUCT,
243            "switch" => Token::SWITCH,
244            "type" => Token::TYPE,
245            "var" => Token::VAR,
246            _ => Token::IDENT(ident.into()),
247        }
248    }
249
250    pub fn int1() -> Token {
251        Token::INT("1".to_owned().into())
252    }
253
254    pub fn precedence(&self) -> usize {
255        match self {
256            Token::LOR => 1,
257            Token::LAND => 2,
258            Token::EQL | Token::NEQ | Token::LSS | Token::LEQ | Token::GTR | Token::GEQ => 3,
259            Token::ADD | Token::SUB | Token::OR | Token::XOR => 4,
260            Token::MUL
261            | Token::QUO
262            | Token::REM
263            | Token::SHL
264            | Token::SHR
265            | Token::AND
266            | Token::AND_NOT => 5,
267            _ => LOWEST_PREC,
268        }
269    }
270
271    pub fn text(&self) -> &str {
272        let (_, t) = self.token_property();
273        t
274    }
275
276    pub fn is_literal(&self) -> bool {
277        match self.token_property().0 {
278            TokenType::Literal => true,
279            _ => false,
280        }
281    }
282
283    pub fn is_operator(&self) -> bool {
284        match self.token_property().0 {
285            TokenType::Operator => true,
286            _ => false,
287        }
288    }
289
290    pub fn is_keyword(&self) -> bool {
291        match self.token_property().0 {
292            TokenType::Keyword => true,
293            _ => false,
294        }
295    }
296
297    pub fn get_literal(&self) -> &str {
298        match self {
299            Token::INT(l) => l.as_str(),
300            Token::FLOAT(l) => l.as_str(),
301            Token::IMAG(l) => l.as_str(),
302            Token::CHAR(l) => l.as_str(),
303            Token::STRING(l) => l.as_str(),
304            _ => "",
305        }
306    }
307
308    pub fn is_stmt_start(&self) -> bool {
309        match self {
310            Token::BREAK => true,
311            Token::CONST => true,
312            Token::CONTINUE => true,
313            Token::DEFER => true,
314            Token::FALLTHROUGH => true,
315            Token::FOR => true,
316            Token::GO => true,
317            Token::GOTO => true,
318            Token::IF => true,
319            Token::RETURN => true,
320            Token::SELECT => true,
321            Token::SWITCH => true,
322            Token::TYPE => true,
323            Token::VAR => true,
324            _ => false,
325        }
326    }
327
328    pub fn is_decl_start(&self) -> bool {
329        match self {
330            Token::CONST => true,
331            Token::TYPE => true,
332            Token::VAR => true,
333            _ => false,
334        }
335    }
336
337    pub fn is_expr_end(&self) -> bool {
338        match self {
339            Token::COMMA => true,
340            Token::COLON => true,
341            Token::SEMICOLON(_) => true,
342            Token::RPAREN => true,
343            Token::RBRACK => true,
344            Token::RBRACE => true,
345            _ => false,
346        }
347    }
348}
349
350impl fmt::Display for Token {
351    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
352        let text = self.text();
353        match self {
354            Token::IDENT(l)
355            | Token::INT(l)
356            | Token::FLOAT(l)
357            | Token::IMAG(l)
358            | Token::CHAR(l)
359            | Token::STRING(l) => f.write_str(l.as_str()),
360            _ => write!(f, "{}", text),
361        }
362    }
363}
364
365impl fmt::Debug for Token {
366    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
367        let text = self.text();
368        match self {
369            Token::IDENT(l)
370            | Token::INT(l)
371            | Token::FLOAT(l)
372            | Token::IMAG(l)
373            | Token::CHAR(l)
374            | Token::STRING(l) => write!(f, "{} {}", text, l.as_str()),
375            Token::SEMICOLON(real) if !*real.as_bool() => write!(f, "\"{}(inserted)\"", text),
376            token if token.is_operator() || token.is_keyword() => write!(f, "\"{}\"", text),
377            _ => write!(f, "{}", text),
378        }
379    }
380}
381
382#[derive(Hash, Eq, PartialEq, Clone, Debug)]
383enum RawTokenData {
384    Bool(bool),
385    Str(String),
386    StrStr(String, String),
387    StrChar(String, char),
388}
389
390#[derive(Hash, Eq, PartialEq, Clone, Debug)]
391pub struct TokenData(Box<RawTokenData>);
392
393impl From<bool> for TokenData {
394    fn from(b: bool) -> Self {
395        TokenData(Box::new(RawTokenData::Bool(b)))
396    }
397}
398
399impl From<String> for TokenData {
400    fn from(s: String) -> Self {
401        TokenData(Box::new(RawTokenData::Str(s)))
402    }
403}
404
405impl From<(String, String)> for TokenData {
406    fn from(ss: (String, String)) -> Self {
407        TokenData(Box::new(RawTokenData::StrStr(ss.0, ss.1)))
408    }
409}
410
411impl From<(String, char)> for TokenData {
412    fn from(ss: (String, char)) -> Self {
413        TokenData(Box::new(RawTokenData::StrChar(ss.0, ss.1)))
414    }
415}
416
417impl AsRef<bool> for TokenData {
418    fn as_ref(&self) -> &bool {
419        self.as_bool()
420    }
421}
422
423impl AsRef<String> for TokenData {
424    fn as_ref(&self) -> &String {
425        self.as_str()
426    }
427}
428
429impl AsMut<String> for TokenData {
430    fn as_mut(&mut self) -> &mut String {
431        self.as_str_mut()
432    }
433}
434
435impl TokenData {
436    pub fn as_bool(&self) -> &bool {
437        match self.0.as_ref() {
438            RawTokenData::Bool(b) => b,
439            _ => unreachable!(),
440        }
441    }
442
443    pub fn as_str(&self) -> &String {
444        match self.0.as_ref() {
445            RawTokenData::Str(s) => s,
446            RawTokenData::StrStr(s, _) => s,
447            RawTokenData::StrChar(s, _) => s,
448            _ => unreachable!(),
449        }
450    }
451
452    pub fn as_str_mut(&mut self) -> &mut String {
453        match self.0.as_mut() {
454            RawTokenData::Str(s) => s,
455            RawTokenData::StrStr(s, _) => s,
456            RawTokenData::StrChar(s, _) => s,
457            _ => unreachable!(),
458        }
459    }
460
461    pub fn as_str_str(&self) -> (&String, &String) {
462        match self.0.as_ref() {
463            RawTokenData::StrStr(s1, s2) => (s1, s2),
464            _ => unreachable!(),
465        }
466    }
467
468    pub fn as_str_char(&self) -> (&String, &char) {
469        match self.0.as_ref() {
470            RawTokenData::StrChar(s, c) => (s, c),
471            _ => unreachable!(),
472        }
473    }
474}
475
476#[cfg(test)]
477mod test {
478    use super::*;
479
480    #[test]
481    fn token_test() {
482        print!(
483            "testxxxxx \n{}\n{}\n{}\n{}\n. ",
484            Token::ILLEGAL("asd".to_owned().into()),
485            Token::SWITCH,
486            Token::IDENT("some_var".to_owned().into()),
487            Token::FLOAT("3.14".to_owned().into()),
488        );
489    }
490}