maolang_core/tokenizer/
keyword.rs

1//! Keyword definitions and global randomizer
2
3use std::collections::HashMap;
4
5use rand::{RngCore, seq::IndexedRandom};
6
7/// All valid keywords (they change a lot so these enum names are the "root" of what they represent)
8#[derive(Debug, PartialEq, Clone, Copy)]
9pub enum Keyword {
10    /// and
11    And,
12    /// false
13    False,
14    /// true
15    True,
16    /// for
17    ForLoopInit,
18    /// if
19    ConditionalCheck,
20    /// else
21    ConditionalElse,
22    /// nil, null, None
23    EmptyValue,
24    /// or
25    Or,
26    /// print to standard out
27    Print,
28    /// var
29    VariableDeclaration,
30    /// while
31    WhileLoopInit,
32
33    /// {
34    OpenBrace,
35    /// }
36    CloseBrace,
37
38    /// =
39    Equal,
40    /// >
41    Greater,
42    /// >=
43    GreaterEqual,
44    /// ==
45    EqualEqual,
46    /// !=
47    BangEqual,
48    /// !
49    Bang,
50    /// <
51    Less,
52    /// <=
53    LessEqual,
54
55    /// Function declaration
56    FuncDec,
57}
58
59/// All valid assignment variants
60const EQUAL_VARIANTS: &[&str] = &["=", "is"];
61/// All valid > variants
62const GREATER_VARIANTS: &[&str] = &[">", "gt", "greater"];
63/// All valid >= variants
64const GREATEREQUAL_VARIANTS: &[&str] = &[">=", "gte"];
65/// All valid `==` variants
66const EQUALEQUAL_VARIANTS: &[&str] = &["==", "equals"];
67/// All valid `!=` variants
68const BANGEQUAL_VARIANTS: &[&str] = &["!=", "inequal"];
69/// All valid `!` variants
70const BANG_VARIANTS: &[&str] = &["!", "not"];
71/// All valid `<` variants
72const LESS_VARIANTS: &[&str] = &["<", "lt", "less"];
73/// All valid `<=` variants
74const LESSEQUAL_VARIANTS: &[&str] = &["<=", "lte"];
75
76/// All valid `{` variants
77const OPEN_BRACE_VARIANTS: &[&str] = &["{", ":"];
78/// All valid `}` variants
79const CLOSE_BRACE_VARIANTS: &[&str] = &["end", "}"];
80
81/// All valid `and` variants
82const AND_VARIANTS: &[&str] = &["and", "&&", r#"/\"#, "alongside"];
83/// All valid `or` variants
84const OR_VARIANTS: &[&str] = &["or", "||", r#"\/"#];
85
86/// All valid `false` variants
87const FALSE_VARIANTS: &[&str] = &["false", "incorrect", "nah", ":("];
88/// All valid `true` variants
89const TRUE_VARIANTS: &[&str] = &["true", "correct", "yah", ":)"];
90
91/// All valid `for` variants
92const FOR_VARIANTS: &[&str] = &["for", "each"];
93/// All valid `if` variants
94const IF_VARIANTS: &[&str] = &["if", "case", "check"];
95/// All valid `else` variants
96const ELSE_VARIANTS: &[&str] = &["else", "then", "otherwise"];
97/// All valid `nil` variants
98const EMPTY_VARIANTS: &[&str] = &["nil", "None", "null", "undefined", "void"];
99
100/// All valid printing variants
101const PRINT_VARIANTS: &[&str] = &[
102    "print",
103    "puts",
104    "echo",
105    "Console.WriteLine",
106    "System.out.println",
107    "console.log",
108    "println",
109    "fmt.Println",
110    "say",
111];
112
113/// All valid `while` variants
114const WHILE_VARIANTS: &[&str] = &["while", "during", "whilst"];
115
116/// All valid `var` variants
117const VARIABLE_DECLARATION_VARIANTS: &[&str] = &["var", "let", "auto", "$", "val", "new"];
118
119/// All valid `function` variants
120const FUNCTION_DECLARATION_VARIANTS: &[&str] = &["fn", "func", "def", "function"];
121
122impl Keyword {
123    /// Gets all bindings to keyword variants
124    pub fn all() -> Vec<(Keyword, &'static [&'static str])> {
125        vec![
126            (Self::And, AND_VARIANTS),
127            (Self::Or, OR_VARIANTS),
128            (Self::True, TRUE_VARIANTS),
129            (Self::False, FALSE_VARIANTS),
130            (Self::ForLoopInit, FOR_VARIANTS),
131            (Self::ConditionalCheck, IF_VARIANTS),
132            (Self::ConditionalElse, ELSE_VARIANTS),
133            (Self::EmptyValue, EMPTY_VARIANTS),
134            (Self::Print, PRINT_VARIANTS),
135            (Self::WhileLoopInit, WHILE_VARIANTS),
136            (Self::VariableDeclaration, VARIABLE_DECLARATION_VARIANTS),
137            (Self::OpenBrace, OPEN_BRACE_VARIANTS),
138            (Self::CloseBrace, CLOSE_BRACE_VARIANTS),
139            (Self::Equal, EQUAL_VARIANTS),
140            (Self::Greater, GREATER_VARIANTS),
141            (Self::GreaterEqual, GREATEREQUAL_VARIANTS),
142            (Self::EqualEqual, EQUALEQUAL_VARIANTS),
143            (Self::BangEqual, BANGEQUAL_VARIANTS),
144            (Self::Bang, BANG_VARIANTS),
145            (Self::Less, LESS_VARIANTS),
146            (Self::LessEqual, LESSEQUAL_VARIANTS),
147            (Self::FuncDec, FUNCTION_DECLARATION_VARIANTS),
148        ]
149    }
150}
151
152/// Struct for maintaining "sanity" across random generation, upon initialization it holds the
153/// current runs definitions for what each keyword maps to. This keeps syntax consistent in a
154/// single run, so variable declaration isn't both `var` and `let` in the same runtime for example
155#[derive(Debug)]
156pub struct KeywordRandomizer {
157    /// The keyword context
158    ctx: HashMap<&'static str, Keyword>,
159}
160
161impl KeywordRandomizer {
162    /// Tries to parse a keyword from a stream
163    pub fn try_parse(
164        &self,
165        stream: &str,
166        idx: usize,
167        len: &mut usize,
168    ) -> Result<Keyword, Option<&'static str>> {
169        let characters: Vec<_> = stream.chars().collect();
170        let mut idx2 = idx;
171
172        while !characters[idx2].is_whitespace() && idx2 < characters.len() - 1 {
173            idx2 += 1
174        }
175
176        while idx <= idx2 {
177            let pot_kwrd = &stream[idx..=idx2];
178            if let Ok(keyword) = self.try_from_str(pot_kwrd) {
179                *len = idx2 - idx + 1;
180                return Ok(keyword);
181            } else {
182                for (root, variants) in Keyword::all() {
183                    if variants.contains(&pot_kwrd) {
184                        if let Some(proper_variant) = self
185                            .ctx
186                            .iter()
187                            .find(|(_, kwrd)| **kwrd == root)
188                            .map(|(var, _)| *var)
189                        {
190                            *len = idx2 - idx + 1;
191                            return Err(Some(proper_variant));
192                        }
193                    }
194                }
195            }
196
197            if idx2 == 0 {
198                break;
199            }
200
201            idx2 -= 1;
202        }
203
204        Err(None)
205    }
206
207    /// Seeds all keywords
208    pub fn seeded_start<RNG: RngCore>(rng: &mut RNG) -> Self {
209        let mut ctx = HashMap::new();
210
211        for (root, variants) in Keyword::all() {
212            // unwrap safety, variants are all non-empty
213            let selected = variants.choose(rng).unwrap();
214            ctx.insert(*selected, root);
215        }
216
217        Self { ctx }
218    }
219
220    /// Attempts to parse a string into a keyword, if the string is valid for a Keyword but not the
221    /// proper variant, the correct variant for this runtime is returned as an error
222    pub fn try_from_str(&self, from: &str) -> Result<Keyword, Option<&'static str>> {
223        if let Some(keyword) = self.ctx.get(from) {
224            Ok(*keyword)
225        } else {
226            for (root, variants) in Keyword::all() {
227                if variants.contains(&from) {
228                    if let Some(proper_variant) = self
229                        .ctx
230                        .iter()
231                        .find(|(_, kwrd)| **kwrd == root)
232                        .map(|(var, _)| *var)
233                    {
234                        return Err(Some(proper_variant));
235                    }
236                }
237            }
238
239            Err(None)
240        }
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use rand::SeedableRng;
247    use rand_chacha::ChaCha8Rng;
248
249    use crate::tokenizer::keyword::Keyword;
250
251    use super::KeywordRandomizer;
252
253    #[test]
254    fn print() {
255        let mut rng = ChaCha8Rng::seed_from_u64(42);
256        let keywords = KeywordRandomizer::seeded_start(&mut rng);
257
258        let attempt = keywords.try_from_str("println");
259
260        assert_eq!(attempt.unwrap(), Keyword::Print)
261    }
262
263    #[test]
264    fn try_from_string() {
265        let mut rng = ChaCha8Rng::seed_from_u64(42);
266        let keywords = KeywordRandomizer::seeded_start(&mut rng);
267
268        let attempt = keywords.try_from_str("var");
269
270        assert_eq!(attempt.unwrap_err(), Some("$"))
271    }
272
273    #[test]
274    fn try_parse_bottom_up() {
275        let mut rng = ChaCha8Rng::seed_from_u64(42);
276        let mut keywords = KeywordRandomizer::seeded_start(&mut rng);
277        let mut len = 0;
278        keywords.ctx.insert(">=", Keyword::GreaterEqual);
279
280        let attempt = keywords.try_parse(">=", 0, &mut len);
281        assert_eq!(attempt.unwrap(), Keyword::GreaterEqual);
282        assert_eq!(len, 2);
283
284        let attempt = keywords.try_parse(" >=", 1, &mut len);
285        assert_eq!(attempt.unwrap(), Keyword::GreaterEqual);
286        assert_eq!(len, 2);
287
288        let attempt = keywords.try_parse(" > =", 1, &mut len);
289        assert_eq!(attempt.unwrap(), Keyword::Greater);
290        assert_eq!(len, 1);
291
292        let attempt = keywords.try_parse(" > =", 3, &mut len);
293        assert_eq!(attempt.unwrap(), Keyword::Equal);
294        assert_eq!(len, 1);
295    }
296}