alkale 2.0.0

A simple LL(1) lexer library for Rust.
Documentation
//! This is a [foreach](https://esolangs.org/wiki/Foreach) lexer.
//! This is intended to show how to use a slightly more complex example compared to brainfuck.
//!
//! Note: Foreach is an esolang so this lexer may be a bit strange.

#![allow(dead_code)]

use alkale::{span::Spanned, token::Token, FinalizedLexerResult, LexerResult, SourceCodeScanner};

/// Represents the foreach tokens
#[derive(Debug, Clone)]
enum ForeachToken<'a> {
    Identifier(&'a str),
    OpenBracket,  // [
    CloseBracket, // ]
    OpenBrace,    // {
    CloseBrace,   // }
    Semicolon,    // ;
    Assign,       // =
    ConstAssign,  // :=
    Foreach,      // =>
    Return,       // ->
}

/// Tokenizes a string according to Foreach grammar.
fn tokenize(source: &str) -> FinalizedLexerResult<ForeachToken<'_>> {
    use ForeachToken::{
        Assign, CloseBrace, CloseBracket, ConstAssign, Foreach, Identifier, OpenBrace, OpenBracket,
        Return, Semicolon,
    };

    // Create the reader context
    let context = SourceCodeScanner::new(source);
    let mut result = LexerResult::new();

    // Iterate as long as more characters exist in the lexer
    while context.has_next() {
        let Spanned {
            span,
            data: identifier,
        } = context.capture_str(|| {
            while let Some(c) = context.peek() {
                if is_identifier_char(c) {
                    context.skip();
                } else {
                    break;
                }
            }
        });

        // If span is None, then 0 characters were read; i.e. there is no identifier.
        if span.is_empty() {
            // Because there's no identifier here, push a single-character token, if there is one.
            // Consume a single character either way.
            let Spanned { span, data } = context.next_span().unwrap();

            let token = match data {
                '[' => OpenBracket,
                ']' => CloseBracket,
                '{' => OpenBrace,
                '}' => CloseBrace,
                ';' => Semicolon,
                _ => continue, // Any other character will just be ignored.
            };

            result.push_token(Token::new(token, span));
            continue;
        };

        // "//" will be matched as an identifier due to language rules.
        // If it's found, then skip until the next newline and continue.
        // Note: Something like "A//" passes this check, this is correct behavior.
        if identifier.starts_with("//") {
            context.skip_until('\n');
            continue;
        }

        // Create a token from the identifier. Some specific identifier are their own tokens.
        let token = match identifier {
            "=" => Assign,
            ":=" => ConstAssign,
            "=>" => Foreach,
            "->" => Return,
            _ => Identifier(identifier),
        };

        // Push the token from above along with the identifier's span.
        result.push_token(Token::new(token, span));
    }

    // Return the result
    result.finalize()
}

/// Returns true if the input is a valid identifier char.
/// Valid identifier chars are any non-whitespace that isn't one of the following: `;{}[]`.
fn is_identifier_char(x: char) -> bool {
    !x.is_whitespace() && x != ';' && x != '[' && x != ']' && x != '{' && x != '}'
}

fn main() {
    let program = r#"
    false :=   [];
    true  := [[]];

    // True -> False, False -> True
    ! inp { 
        v := inp => -> false; 
        -> true; 
    }

    // True if input array contains only truthy values.
        && inp {
        v := inp => _ := ! v => -> false;
        -> true;
    }

    // True if input array contains at least 1 truthy value.
    || inp v := inp => _ := v => -> true;

    // True if number of truthy values in input array is odd 
        ^ inp {
        out = false;
        v := inp => _ := v => out = ! out;
        -> out;
    }

    "#;

    let result = tokenize(program);

    println!("{result:#?}");
}