foreach/
foreach.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
//! This is a [foreach](https://esolangs.org/wiki/Foreach) tokenizer.
//! This is intended to show how to use a slightly more complex example compared to brainfuck.
//!
//! Note: Foreach is an esolang so this tokenizer may be a bit strange.

#![allow(dead_code)]

use alkale::{token::Token, TokenizerContext, TokenizerResult};

/// Represents the foreach tokens
#[derive(Debug, Clone)]
enum ForeachToken {
    Identifier(String),
    OpenBracket,  // [
    CloseBracket, // ]
    OpenBrace,    // {
    CloseBrace,   // }
    Semicolon,    // ;
    Assign,       // =
    ConstAssign,  // :=
    Foreach,      // =>
    Return,       // ->
}

/// Tokenizes a string according to Foreach grammar.
fn tokenize(source: &str) -> TokenizerResult<ForeachToken> {
    use ForeachToken::*;

    // Create the reader context
    let mut context = TokenizerContext::new(source.chars());

    // Iterate as long as more characters exist in the tokenizer
    while context.has_next() {
        // Attempt to read an identifier.
        let mut identifier = String::with_capacity(64);
        let identifier_span = context.read_into_while(&mut identifier, is_identifier_char);

        // If span is None, then 0 characters were read; i.e. there is no identifier.
        let Some(span) = identifier_span else {
            // Because there's no identifier here, push a single-character token, if there is one.
            // Consume a single character either way.
            let (char, span) = context.next_span().unwrap();

            let token = match char {
                '[' => OpenBracket,
                ']' => CloseBracket,
                '{' => OpenBrace,
                '}' => CloseBrace,
                ';' => Semicolon,
                _ => continue, // Any other character will just be ignored.
            };

            context.push_token(Token::new(token, span));
            continue;
        };

        // "//" will be matched as an identifier due to language rules.
        // If it's found, then skip until the next newline and continue.
        // Note: Something like "A//" passes this check, this is correct behavior.
        if identifier.starts_with("//") {
            context.skip_until('\n');
            continue;
        }

        // Create a token from the identifier. Some specific identifier are their own tokens.
        let token = match identifier.as_str() {
            "=" => Assign,
            ":=" => ConstAssign,
            "=>" => Foreach,
            "->" => Return,
            _ => Identifier(identifier),
        };

        // Push the token from above along with the identifier's span.
        context.push_token(Token::new(token, span));
    }

    // Return the result
    context.result()
}

/// Returns true if the input is a valid identifier char.
/// Valid identifier chars are any non-whitespace that isn't one o the following: `;{}[]`.
fn is_identifier_char(x: &char) -> bool {
    !x.is_whitespace() && *x != ';' && *x != '[' && *x != ']' && *x != '{' && *x != '}'
}

fn main() {
    let program = r#"
    false :=   [];
    true  := [[]];

    // True -> False, False -> True
    ! inp { 
        v := inp => -> false; 
        -> true; 
    }

    // True if input array contains only truthy values.
        && inp {
        v := inp => _ := ! v => -> false;
        -> true;
    }

    // True if input array contains at least 1 truthy value.
    || inp v := inp => _ := v => -> true;

    // True if number of truthy values in input array is odd 
        ^ inp {
        out = false;
        v := inp => _ := v => out = ! out;
        -> out;
    }

    "#;

    let result = tokenize(program);

    println!("{:#?}", result);
}