rpg_compiler/tokenizer/
tokenizer.rs

1use std::fmt::{Debug, Formatter};
2use lazy_static::lazy_static;
3use regex::Regex;
4use TokenType::*;
5use crate::compile_error;
6
7lazy_static! {
8    /// All token types and their regexes
9    static ref TOKEN_TYPES: [TokenRegex; 26] = [
10        TokenRegex { ttype: Char, regex: Regex::new(r"\A\bchar\b").unwrap() },
11        TokenRegex { ttype: Zombie, regex: Regex::new(r"\A\bzombie\b").unwrap() },
12        TokenRegex { ttype: Merchant, regex: Regex::new(r"\A\bmerchant\b").unwrap() },
13        TokenRegex { ttype: Potion, regex: Regex::new(r"\A\bpotion\b").unwrap() },
14        TokenRegex { ttype: SpellBook, regex: Regex::new(r"\A\bspellbook\b").unwrap() },
15        TokenRegex { ttype: End, regex: Regex::new(r"\A\bend\b").unwrap() },
16        TokenRegex { ttype: FnAttacks, regex: Regex::new(r"\A\battacks\b").unwrap() },
17        TokenRegex { ttype: FnShouts, regex: Regex::new(r"\A\bshouts\b").unwrap() },
18        TokenRegex { ttype: FnWhispers, regex: Regex::new(r"\A\bwhispers\b").unwrap() },
19        TokenRegex { ttype: FnBuys, regex: Regex::new(r"\A\bbuys\b").unwrap() },
20        TokenRegex { ttype: FnUses, regex: Regex::new(r"\A\buses\b").unwrap() },
21        TokenRegex { ttype: FnCasting, regex: Regex::new(r"\A\bcasting\b").unwrap() },
22        TokenRegex { ttype: SbFnSpeak, regex: Regex::new(r"\A\bspeak\b").unwrap() },
23        TokenRegex { ttype: SbFnUnZombify, regex: Regex::new(r"\A\bun_zombify\b").unwrap() },
24        TokenRegex { ttype: SbFnConfuse, regex: Regex::new(r"\A\bconfuse\b").unwrap() },
25        TokenRegex { ttype: SbFnGodSpeech, regex: Regex::new(r"\A\bgod_speech\b").unwrap() },
26        TokenRegex { ttype: SbFnTimeWarp, regex: Regex::new(r"\A\btime_warp\b").unwrap() },
27        TokenRegex { ttype: SbFnShift, regex: Regex::new(r"\A\bshift\b").unwrap() },
28        TokenRegex { ttype: SbFnCreatePotion, regex: Regex::new(r"\A\bcreate_potion\b").unwrap() },
29        TokenRegex { ttype: From, regex: Regex::new(r"\A\bfrom\b").unwrap() },
30        // Identifier also matches all of the above, which is why it should be below all of them
31        // This means that all of the above are reserved words
32        TokenRegex { ttype: Identifier, regex: Regex::new(r"\A\b[a-zA-Z_]\w*\b").unwrap() },
33        TokenRegex { ttype: Integer, regex: Regex::new(r"\A-?[0-9]+").unwrap() },
34        TokenRegex { ttype: Equals, regex: Regex::new(r"\A=").unwrap() },
35        TokenRegex { ttype: OParen, regex: Regex::new(r"\A\(").unwrap() },
36        TokenRegex { ttype: CParen, regex: Regex::new(r"\A\)").unwrap() },
37        TokenRegex { ttype: Comma, regex: Regex::new(r"\A,").unwrap() },
38    ];
39}
40
41/// Tokenizes an input string
42pub struct Tokenizer<'a> {
43    code: &'a str
44}
45
46impl<'a> Tokenizer<'a> {
47    pub fn new(code: &'a str) -> Tokenizer<'a> {
48        Self { code }
49    }
50    
51    pub fn tokenize(&mut self) -> Vec<Token> {
52        let mut tokens: Vec<Token> = Vec::new();
53        while !self.code.is_empty() {
54            tokens.push(self.tokenize_next());
55            self.code = self.code.trim();
56        }
57        tokens
58    }
59    
60    fn tokenize_next(&mut self) -> Token {
61        for token_type in TOKEN_TYPES.iter() {
62            // m = match
63            for m in token_type.regex.captures_iter(&self.code) {
64                // We will only have 1 match because of \A
65                if let Some(_match) = m.get(0) {
66                    let m = _match.as_str().to_string();
67                    self.code = self.code.strip_prefix(m.as_str()).expect("Unexpected error: could not strip match from code");
68                    return Token::new(token_type.ttype, m);
69                }
70            }
71        }
72        
73        // Have no matches
74        let first_token = self.code.split(|c| c == ' ').collect::<Vec<&str>>();
75        if let Some(first_token) = first_token.get(0) {
76            compile_error!("Unexpected token: found {}", first_token)
77        } else {
78            compile_error!("Expected token but got None")
79        }
80    }
81}
82
83/// Contains the regex for a token and its type
84#[derive(Debug)]
85pub struct TokenRegex {
86    /// Token type
87    pub ttype: TokenType,
88    /// A regex in perl-compatible syntax
89    pub regex: Regex
90}
91
92/// Represents a single token
93#[derive(Debug, Clone)]
94pub struct Token {
95    /// Token type
96    pub ttype: TokenType,
97    /// token value
98    pub value: String
99}
100
101impl Token {
102    pub fn new(t: TokenType, v: String) -> Self {
103        Self { ttype: t, value: v }
104    }
105}
106
107#[derive(Copy, Clone, PartialEq)]
108/// The token types available in the RPG language
109pub enum TokenType {
110    // types
111    Char,
112    Zombie,
113    Merchant,
114    Potion,
115    SpellBook,
116    End,
117    // functions
118    FnBuys,
119    FnAttacks,
120    FnShouts,
121    FnWhispers,
122    FnUses,
123    FnCasting,
124    // SpellBookFunctions
125    SbFnSpeak,
126    SbFnUnZombify,
127    SbFnConfuse,
128    SbFnGodSpeech,
129    SbFnTimeWarp,
130    SbFnShift,
131    SbFnCreatePotion,
132    // Other
133    From,
134    // Names
135    Identifier,
136    // Implicit types
137    /// A signed integer
138    Integer,
139    // Punctuation
140    /// =
141    Equals,
142    /// )
143    OParen,
144    /// (
145    CParen,
146    Comma
147}
148
149impl Debug for TokenType {
150    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
151        write!(f, "{}", self.to_string())
152    }
153}
154
155impl ToString for TokenType {
156    fn to_string(&self) -> String {
157        match self {
158            Self::Char => "char".to_string(),
159            Self::Zombie => "zombie".to_string(),
160            Self::Merchant => "merchant".to_string(),
161            Self::Potion => "potion".to_string(),
162            Self::SpellBook => "spellbook".to_string(),
163            Self::End => "end".to_string(),
164            Self::FnAttacks => "attacks".to_string(),
165            Self::FnShouts => "shouts".to_string(),
166            Self::FnWhispers => "whispers".to_string(),
167            Self::FnBuys => "buys".to_string(),
168            Self::FnUses => "uses".to_string(),
169            Self::FnCasting => "casting".to_string(),
170            Self::SbFnSpeak => "speak()".to_string(),
171            Self::SbFnUnZombify => "un_zombify()".to_string(),
172            Self::SbFnConfuse => "confuse()".to_string(),
173            Self::SbFnGodSpeech => "god_speech()".to_string(),
174            Self::SbFnTimeWarp => "time_warp()".to_string(),
175            Self::SbFnShift => "shift()".to_string(),
176            Self::SbFnCreatePotion => "create_potion()".to_string(),
177            Self::From => "from".to_string(),
178            Self::Identifier => "identifier".to_string(),
179            Self::Integer => "integer".to_string(),
180            Self::Equals => "'='".to_string(),
181            Self::OParen => "'('".to_string(),
182            Self::CParen => "')'".to_string(),
183            Self::Comma => "','".to_string(),
184        }
185    }
186}
187
188#[allow(unused)]
189impl TokenType {
190    fn formatted(&self) -> String {
191        match self {
192            Self::Identifier => { format!("an {}", self.to_string()) }
193            Self::Integer => { format!("an {}", self.to_string()) }
194            _ => self.to_string()
195        }
196    }
197}