yul_parser/
token.rs

1use logos::Logos;
2use num_derive::{FromPrimitive, ToPrimitive};
3use serde::{Deserialize, Serialize};
4
5#[derive(
6    Clone, Copy, PartialEq, Logos, Debug, FromPrimitive, ToPrimitive, Serialize, Deserialize,
7)]
8pub enum Token {
9    // Keywords
10    #[token("object")]
11    Object,
12    #[token("code")]
13    Code,
14    #[token("data")]
15    Data,
16    #[token("function")]
17    Function,
18    #[token("let")]
19    Let,
20    #[token("if")]
21    If,
22    #[token("switch")]
23    Switch,
24    #[token("case")]
25    Case,
26    #[token("default")]
27    Default,
28    #[token("for")]
29    For,
30    #[token("break")]
31    Break,
32    #[token("continue")]
33    Continue,
34    #[token("leave")]
35    Leave,
36
37    // Syntax
38    #[token("(")]
39    ParenOpen,
40    #[token(")")]
41    ParenClose,
42    #[token(r"{")]
43    BraceOpen,
44    #[token(r"}")]
45    BraceClose,
46    #[token(",")]
47    Comma,
48    #[token(":=")]
49    Assign,
50    #[token("->")]
51    Returns,
52    #[token(":")]
53    Typed,
54
55    // Identifiers
56    #[regex(r"[a-zA-Z_$][a-zA-Z_$0-9.]*")]
57    Identifier,
58
59    // Literals
60    #[token("true")]
61    LiteralTrue,
62    #[token("false")]
63    LiteralFalse,
64    #[regex(r"0x[0-9a-fA-F]+")]
65    LiteralHex,
66    #[regex(r"[0-9]+")]
67    LiteralInt,
68
69    #[regex(r#""([^"\r\n\\]|\\.)*""#)]
70    LiteralString,
71    #[regex(r#"hex"([0-9a-fA-F][0-9a-fA-F])*""#)]
72    LiteralStringHex,
73
74    // Ignored syntax
75    #[regex(r#"//[^\n]*"#)]
76    LineComment,
77    // TODO: Allow * in block comment when not followed by /
78    // See <https://stackoverflow.com/questions/16160190/regular-expression-to-find-c-style-block-comments>
79    #[regex(r#"/\*[^*]*\*/"#)]
80    BlockComment,
81    #[regex(r"[ \t\n\f]+")]
82    Whitespace,
83
84    // Logos requires one token variant to handle errors,
85    // it can be named anything you wish.
86    #[error]
87    Error,
88}
89
90impl Token {
91    /// Significant tokens are not whitespace, comments, etc.
92    pub fn is_significant(&self) -> bool {
93        use Token::*;
94        match self {
95            LineComment | BlockComment | Whitespace => false,
96            _ => true,
97        }
98    }
99
100    /// Token represents a lexer error
101    pub fn is_error(&self) -> bool {
102        use Token::*;
103        match self {
104            Error => true,
105            _ => false,
106        }
107    }
108}
109
110#[cfg(test)]
111mod tests {
112    use super::*;
113
114    #[test]
115    fn lex_example() {
116        let example = include_str!("../test/example.yul");
117        let tokens = Token::lexer(example).collect::<Vec<_>>();
118        let expected: Vec<Token> =
119            serde_json::from_str(include_str!("../test/example.tokens.json")).unwrap();
120        assert_eq!(tokens, expected);
121    }
122
123    #[test]
124    fn lex_erc20() {
125        let example = include_str!("../test/erc20.yul");
126        let tokens = Token::lexer(example).collect::<Vec<_>>();
127        let expected: Vec<Token> =
128            serde_json::from_str(include_str!("../test/erc20.tokens.json")).unwrap();
129        assert_eq!(tokens, expected);
130    }
131}