maolang_core/tokenizer/
keyword.rs

1//! Keyword definitions and global randomizer
2
3use std::collections::HashMap;
4
5use rand::{RngCore, seq::IndexedRandom};
6
7/// All valid keywords (they change a lot so these enum names are the "root" of what they represent)
8#[derive(Debug, PartialEq, Clone, Copy)]
9pub enum Keyword {
10    /// and
11    And,
12    /// false
13    False,
14    /// true
15    True,
16    /// for
17    ForLoopInit,
18    /// if
19    ConditionalCheck,
20    /// else
21    ConditionalElse,
22    /// nil, null, None
23    EmptyValue,
24    /// or
25    Or,
26    /// print to standard out
27    Print,
28    /// var
29    VariableDeclaration,
30    /// while
31    WhileLoopInit,
32
33    /// {
34    OpenBrace,
35    /// }
36    CloseBrace,
37
38    /// =
39    Equal,
40    /// >
41    Greater,
42    /// >=
43    GreaterEqual,
44    /// ==
45    EqualEqual,
46    /// !=
47    BangEqual,
48    /// !
49    Bang,
50    /// <
51    Less,
52    /// <=
53    LessEqual,
54}
55
56/// All valid assignment variants
57const EQUAL_VARIANTS: &[&str] = &["=", "equals", "is"];
58/// All valid > variants
59const GREATER_VARIANTS: &[&str] = &[">", "gt", "greater"];
60/// All valid >= variants
61const GREATEREQUAL_VARIANTS: &[&str] = &[">=", "gte"];
62/// All valid `==` variants
63const EQUALEQUAL_VARIANTS: &[&str] = &["==", "equals"];
64/// All valid `!=` variants
65const BANGEQUAL_VARIANTS: &[&str] = &["!=", "inequal"];
66/// All valid `!` variants
67const BANG_VARIANTS: &[&str] = &["!", "not"];
68/// All valid `<` variants
69const LESS_VARIANTS: &[&str] = &["<", "lt", "less"];
70/// All valid `<=` variants
71const LESSEQUAL_VARIANTS: &[&str] = &["<=", "lte"];
72
73/// All valid `{` variants
74const OPEN_BRACE_VARIANTS: &[&str] = &["{", ":"];
75/// All valid `}` variants
76const CLOSE_BRACE_VARIANTS: &[&str] = &["}", "end"];
77
78/// All valid `and` variants
79const AND_VARIANTS: &[&str] = &["and", "&&", r#"/\"#, "alongside"];
80/// All valid `or` variants
81const OR_VARIANTS: &[&str] = &["or", "||", r#"\/"#];
82
83/// All valid `false` variants
84const FALSE_VARIANTS: &[&str] = &["false", "False", "FALSE", "incorrect", "nah", ":("];
85/// All valid `true` variants
86const TRUE_VARIANTS: &[&str] = &["true", "True", "TRUE", "correct", "yah", ":)"];
87
88/// All valid `for` variants
89const FOR_VARIANTS: &[&str] = &["for", "each"];
90/// All valid `if` variants
91const IF_VARIANTS: &[&str] = &["if", "case", "check", "cond"];
92/// All valid `else` variants
93const ELSE_VARIANTS: &[&str] = &["else", "then", "otherwise"];
94/// All valid `nil` variants
95const EMPTY_VARIANTS: &[&str] = &["nil", "None", "null", "NULL", "undefined", "void"];
96
97/// All valid printing variants
98const PRINT_VARIANTS: &[&str] = &[
99    "print",
100    "puts",
101    "echo",
102    "Console.WriteLine",
103    "System.out.println",
104    "println",
105    "fmt.Println",
106    "console.log",
107    "say",
108];
109
110/// All valid `while` variants
111const WHILE_VARIANTS: &[&str] = &["while", "during", "whilst", "until", "as_long_as"];
112
113/// All valid `var` variants
114const VARIABLE_DECLARATION_VARIANTS: &[&str] = &["var", "let", "auto", "$", "val", "new"];
115
116impl Keyword {
117    /// Gets all bindings to keyword variants
118    pub fn all() -> Vec<(Keyword, &'static [&'static str])> {
119        vec![
120            (Self::And, AND_VARIANTS),
121            (Self::Or, OR_VARIANTS),
122            (Self::True, TRUE_VARIANTS),
123            (Self::False, FALSE_VARIANTS),
124            (Self::ForLoopInit, FOR_VARIANTS),
125            (Self::ConditionalCheck, IF_VARIANTS),
126            (Self::ConditionalElse, ELSE_VARIANTS),
127            (Self::EmptyValue, EMPTY_VARIANTS),
128            (Self::Print, PRINT_VARIANTS),
129            (Self::WhileLoopInit, WHILE_VARIANTS),
130            (Self::VariableDeclaration, VARIABLE_DECLARATION_VARIANTS),
131            (Self::OpenBrace, OPEN_BRACE_VARIANTS),
132            (Self::CloseBrace, CLOSE_BRACE_VARIANTS),
133            (Self::Equal, EQUAL_VARIANTS),
134            (Self::Greater, GREATER_VARIANTS),
135            (Self::GreaterEqual, GREATEREQUAL_VARIANTS),
136            (Self::EqualEqual, EQUALEQUAL_VARIANTS),
137            (Self::BangEqual, BANGEQUAL_VARIANTS),
138            (Self::Bang, BANG_VARIANTS),
139            (Self::Less, LESS_VARIANTS),
140            (Self::LessEqual, LESSEQUAL_VARIANTS),
141        ]
142    }
143}
144
145/// Struct for maintaining "sanity" across random generation, upon initialization it holds the
146/// current runs definitions for what each keyword maps to. This keeps syntax consistent in a
147/// single run, so variable declaration isn't both `var` and `let` in the same runtime for example
148#[derive(Debug)]
149pub struct KeywordRandomizer {
150    /// The keyword context
151    ctx: HashMap<&'static str, Keyword>,
152}
153
154impl KeywordRandomizer {
155    /// Tries to parse a keyword from a stream
156    pub fn try_parse(&self, stream: &str, idx: usize, len: &mut usize) -> Option<Keyword> {
157        let characters: Vec<_> = stream.chars().collect();
158        let mut idx2 = idx;
159
160        while !characters[idx2].is_whitespace() && idx2 < characters.len() - 1 {
161            idx2 += 1
162        }
163
164        while idx <= idx2 {
165            let pot_kwrd = &stream[idx..=idx2];
166            if let Ok(keyword) = self.try_from_str(pot_kwrd) {
167                *len = idx2 - idx + 1;
168                return Some(keyword);
169            }
170
171            if idx2 == 0 {
172                break;
173            }
174
175            idx2 -= 1;
176        }
177
178        None
179    }
180
181    /// Seeds all keywords
182    pub fn seeded_start<RNG: RngCore>(rng: &mut RNG) -> Self {
183        let mut ctx = HashMap::new();
184
185        for (root, variants) in Keyword::all() {
186            // unwrap safety, variants are all non-empty
187            let selected = variants.choose(rng).unwrap();
188            ctx.insert(*selected, root);
189        }
190
191        Self { ctx }
192    }
193
194    /// Attempts to parse a string into a keyword, if the string is valid for a Keyword but not the
195    /// proper variant, the correct variant for this runtime is returned as an error
196    pub fn try_from_str(&self, from: &str) -> Result<Keyword, Option<&'static str>> {
197        if let Some(keyword) = self.ctx.get(from) {
198            Ok(*keyword)
199        } else {
200            for (root, variants) in Keyword::all() {
201                if variants.contains(&from) {
202                    if let Some(proper_variant) = self
203                        .ctx
204                        .iter()
205                        .find(|(_, kwrd)| **kwrd == root)
206                        .map(|(var, _)| *var)
207                    {
208                        return Err(Some(proper_variant));
209                    }
210                }
211            }
212
213            Err(None)
214        }
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use rand::SeedableRng;
221    use rand_chacha::ChaCha8Rng;
222
223    use crate::tokenizer::keyword::Keyword;
224
225    use super::KeywordRandomizer;
226
227    #[test]
228    fn print() {
229        let mut rng = ChaCha8Rng::seed_from_u64(42);
230        let keywords = KeywordRandomizer::seeded_start(&mut rng);
231
232        let attempt = keywords.try_from_str("fmt.Println");
233
234        assert_eq!(attempt.unwrap(), Keyword::Print)
235    }
236
237    #[test]
238    fn try_from_string() {
239        let mut rng = ChaCha8Rng::seed_from_u64(42);
240        let keywords = KeywordRandomizer::seeded_start(&mut rng);
241
242        let attempt = keywords.try_from_str("var");
243
244        assert_eq!(attempt.unwrap_err(), Some("$"))
245    }
246
247    #[test]
248    fn try_parse_bottom_up() {
249        let mut rng = ChaCha8Rng::seed_from_u64(42);
250        let mut keywords = KeywordRandomizer::seeded_start(&mut rng);
251        let mut len = 0;
252        keywords.ctx.insert(">=", Keyword::GreaterEqual);
253
254        let attempt = keywords.try_parse(">=", 0, &mut len);
255        assert_eq!(attempt.unwrap(), Keyword::GreaterEqual);
256        assert_eq!(len, 2);
257
258        let attempt = keywords.try_parse(" >=", 1, &mut len);
259        assert_eq!(attempt.unwrap(), Keyword::GreaterEqual);
260        assert_eq!(len, 2);
261
262        let attempt = keywords.try_parse(" > =", 1, &mut len);
263        assert_eq!(attempt.unwrap(), Keyword::Greater);
264        assert_eq!(len, 1);
265
266        let attempt = keywords.try_parse(" > =", 3, &mut len);
267        assert_eq!(attempt.unwrap(), Keyword::Equal);
268        assert_eq!(len, 1);
269    }
270}