1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
//! This is a [foreach](https://esolangs.org/wiki/Foreach) tokenizer.
//! This is intended to show how to use a slightly more complex example compared to brainfuck.
//!
//! Note: Foreach is an esolang so this tokenizer may be a bit strange.
#![allow(dead_code)]
use alkale::{token::Token, TokenizerContext, TokenizerResult};
/// Represents the foreach tokens
#[derive(Debug, Clone)]
enum ForeachToken {
Identifier(String),
OpenBracket, // [
CloseBracket, // ]
OpenBrace, // {
CloseBrace, // }
Semicolon, // ;
Assign, // =
ConstAssign, // :=
Foreach, // =>
Return, // ->
}
/// Tokenizes a string according to Foreach grammar.
fn tokenize(source: &str) -> TokenizerResult<ForeachToken> {
use ForeachToken::*;
// Create the reader context
let mut context = TokenizerContext::new(source.chars());
// Iterate as long as more characters exist in the tokenizer
while context.has_next() {
// Attempt to read an identifier.
let mut identifier = String::with_capacity(64);
let identifier_span = context.read_into_while(&mut identifier, is_identifier_char);
// If span is None, then 0 characters were read; i.e. there is no identifier.
let Some(span) = identifier_span else {
// Because there's no identifier here, push a single-character token, if there is one.
// Consume a single character either way.
let (char, span) = context.next_span().unwrap();
let token = match char {
'[' => OpenBracket,
']' => CloseBracket,
'{' => OpenBrace,
'}' => CloseBrace,
';' => Semicolon,
_ => continue, // Any other character will just be ignored.
};
context.push_token(Token::new(token, span));
continue;
};
// "//" will be matched as an identifier due to language rules.
// If it's found, then skip until the next newline and continue.
// Note: Something like "A//" passes this check, this is correct behavior.
if identifier.starts_with("//") {
context.skip_until('\n');
continue;
}
// Create a token from the identifier. Some specific identifier are their own tokens.
let token = match identifier.as_str() {
"=" => Assign,
":=" => ConstAssign,
"=>" => Foreach,
"->" => Return,
_ => Identifier(identifier),
};
// Push the token from above along with the identifier's span.
context.push_token(Token::new(token, span));
}
// Return the result
context.result()
}
/// Returns true if the input is a valid identifier char.
/// Valid identifier chars are any non-whitespace that isn't one o the following: `;{}[]`.
fn is_identifier_char(x: &char) -> bool {
!x.is_whitespace() && *x != ';' && *x != '[' && *x != ']' && *x != '{' && *x != '}'
}
fn main() {
let program = r#"
false := [];
true := [[]];
// True -> False, False -> True
! inp {
v := inp => -> false;
-> true;
}
// True if input array contains only truthy values.
&& inp {
v := inp => _ := ! v => -> false;
-> true;
}
// True if input array contains at least 1 truthy value.
|| inp v := inp => _ := v => -> true;
// True if number of truthy values in input array is odd
^ inp {
out = false;
v := inp => _ := v => out = ! out;
-> out;
}
"#;
let result = tokenize(program);
println!("{:#?}", result);
}