foreach/
foreach.rs

//! This is a [foreach](https://esolangs.org/wiki/Foreach) tokenizer.
//! This is intended to show how to use a slightly more complex example compared to brainfuck.
//!
//! Note: Foreach is an esolang so this tokenizer may be a bit strange.

#![allow(dead_code)]

use alkale::{token::Token, TokenizerContext, TokenizerResult};

/// Represents the foreach tokens
#[derive(Debug, Clone)]
enum ForeachToken {
    Identifier(String),
    OpenBracket,  // [
    CloseBracket, // ]
    OpenBrace,    // {
    CloseBrace,   // }
    Semicolon,    // ;
    Assign,       // =
    ConstAssign,  // :=
    Foreach,      // =>
    Return,       // ->
}

/// Tokenizes a string according to Foreach grammar.
fn tokenize(source: &str) -> TokenizerResult<ForeachToken> {
    use ForeachToken::*;

    // Create the reader context
    let mut context = TokenizerContext::new(source.chars());

    // Iterate as long as more characters exist in the tokenizer
    while context.has_next() {
        // Attempt to read an identifier.
        let mut identifier = String::with_capacity(64);
        let identifier_span = context.read_into_while(&mut identifier, is_identifier_char);

        // If span is None, then 0 characters were read; i.e. there is no identifier.
        let Some(span) = identifier_span else {
            // Because there's no identifier here, push a single-character token, if there is one.
            // Consume a single character either way.
            let (char, span) = context.next_span().unwrap();

            let token = match char {
                '[' => OpenBracket,
                ']' => CloseBracket,
                '{' => OpenBrace,
                '}' => CloseBrace,
                ';' => Semicolon,
                _ => continue, // Any other character will just be ignored.
            };

            context.push_token(Token::new(token, span));
            continue;
        };

        // "//" will be matched as an identifier due to language rules.
        // If it's found, then skip until the next newline and continue.
        // Note: Something like "A//" passes this check, this is correct behavior.
        if identifier.starts_with("//") {
            context.skip_until('\n');
            continue;
        }

        // Create a token from the identifier. Some specific identifier are their own tokens.
        let token = match identifier.as_str() {
            "=" => Assign,
            ":=" => ConstAssign,
            "=>" => Foreach,
            "->" => Return,
            _ => Identifier(identifier),
        };

        // Push the token from above along with the identifier's span.
        context.push_token(Token::new(token, span));
    }

    // Return the result
    context.result()
}

/// Returns true if the input is a valid identifier char.
/// Valid identifier chars are any non-whitespace that isn't one o the following: `;{}[]`.
fn is_identifier_char(x: &char) -> bool {
    !x.is_whitespace() && *x != ';' && *x != '[' && *x != ']' && *x != '{' && *x != '}'
}

fn main() {
    let program = r#"
    false :=   [];
    true  := [[]];

    // True -> False, False -> True
    ! inp { 
        v := inp => -> false; 
        -> true; 
    }

    // True if input array contains only truthy values.
        && inp {
        v := inp => _ := ! v => -> false;
        -> true;
    }

    // True if input array contains at least 1 truthy value.
    || inp v := inp => _ := v => -> true;

    // True if number of truthy values in input array is odd 
        ^ inp {
        out = false;
        v := inp => _ := v => out = ! out;
        -> out;
    }

    "#;

    let result = tokenize(program);

    println!("{:#?}", result);
}
foreach/foreach.rs

foreach/
foreach.rs