alkale 2.0.0

A simple LL(1) lexer library for Rust.
Documentation
//! This example tokenizes a slightly modified form of JSON.

#![allow(missing_docs)]

use alkale::{
    common::string::{ParseCharError, StringTokenError},
    format_notification, map_single_char_token,
    notification::{NotificationBuilder, NotificationSeverity},
    span::Spanned,
    token::Token,
    LexerResult, SourceCodeScanner,
};

#[derive(Debug, Clone)]
pub enum JsonToken {
    OpenBrace,
    CloseBrace,
    OpenBracket,
    CloseBracket,
    Colon,
    Comma,
    Null,
    Bool(bool),
    Str(String),
    Number(f64),
}

pub fn main() {
    use JsonToken::{
        Bool, CloseBrace, CloseBracket, Colon, Comma, Null, Number, OpenBrace, OpenBracket, Str,
    };

    let code = r##"
        {
            "a": "Assigned to the \"a\" property!",
            "b" : 25.2,
            "c": false,
            "d": true,
            "12": null,
            "list": [
                1,
                -2e+2,
                3.1
            ]
        }
    "##;

    let context = SourceCodeScanner::new(code);
    let mut result = LexerResult::<JsonToken, ()>::new();

    while context.has_next() {
        // Map single-character tokens to their data.
        map_single_char_token!(&context, &mut result,
            '{' => OpenBrace,
            '}' => CloseBrace,
            '[' => OpenBracket,
            ']' => CloseBracket,
            ':' => Colon,
            ',' => Comma,
        );

        // Attempt to parse an identifier for certain tokens.
        if let Some(Spanned { data, span }) = context.try_consume_standard_identifier() {
            let tk = match data {
                "null" => Some(Null),
                "true" => Some(Bool(true)),
                "false" => Some(Bool(false)),
                _ => None,
            };

            // If the above table mapped the identifier to valid token data, push a token.
            // If it didn't report an error— either way restart the loop.
            if let Some(tk) = tk {
                result.push_token(Token::new(tk, span));
            } else {
                format_notification!("Unexpected identifier {data}")
                    .span(span)
                    .severity(NotificationSeverity::Error)
                    .report(&mut result);
            }

            continue;
        }

        // If the next element in the source code is a string, parse it and report errors as necessary.
        if let Some(Spanned { data, span }) = context.try_parse_strict_string() {
            match data {
                Ok(string) => {
                    // Push the valid string.
                    result.push_token(Token::new(Str(string), span));
                }
                Err(errors) => {
                    // Create a notification for every error.
                    for error in errors {
                        use ParseCharError::{
                            IllegalEscape, NoCharFound, NoEscape, UnescapedDelimiter,
                        };
                        use StringTokenError::{CharError, NoClosingDelimiter};

                        let builder = match error {
                            CharError(NoEscape(span)) => {
                                NotificationBuilder::new("Missing escape code after backslash")
                                    .span(span)
                            }
                            CharError(IllegalEscape(char, span)) => {
                                format_notification!("Illegal escape code '{char}'").span(span)
                            }
                            CharError(NoCharFound) => {
                                unreachable!("Strings will never create this error")
                            }
                            CharError(UnescapedDelimiter(_)) => {
                                unreachable!("String methods cannot create this error")
                            }
                            NoClosingDelimiter => {
                                NotificationBuilder::new("Missing closing delimiter on string")
                                    .span(span)
                            }
                            _ => NotificationBuilder::new("Unknown error").span(span),
                        };

                        builder
                            .severity(NotificationSeverity::Error)
                            .report(&mut result);
                    }
                }
            }
            continue;
        }

        // If the next character is a minus sign, skip it and set
        // this variable to its span. Otherwise, set it to None.
        //
        // Note: Negatives are parsed during lexing because no subtraction
        // exists in JSON, every - sign is unary so there's no ambiguity.
        let negative_sign_span = if context.peek_is('-') {
            let span = context.next_span().unwrap().span;
            context.skip_whitespace();
            Some(span)
        } else {
            None
        };

        // Attempt to parse a floating point number. If one was found and it was valid, push
        // a token for it, otherwise report a parsing error.
        if let Some(Spanned { data, span }) = context.try_parse_float() {
            if let Ok(mut number) = data {
                // If a negative sign was found prior, negate the number.
                // This is completely lossless because floating point numbers use a sign bit.
                negative_sign_span.is_some().then(|| number = -number);

                result.push_token(Token::new(Number(number), span));
            } else {
                NotificationBuilder::new("Floating-point number is malformed")
                    .severity(NotificationSeverity::Error)
                    .span(span)
                    .report(&mut result);
            }

            continue;
        } else if let Some(span) = negative_sign_span {
            // If no number was found, but we DID find a negative sign, then that negative sign is alone and
            // is thus invalid.
            NotificationBuilder::new("Negative sign should have a number after it")
                .severity(NotificationSeverity::Error)
                .span(span)
                .report(&mut result);

            continue;
        }

        // If whitespace is found, skip it and continue, otherwise throw an error indicating this
        // is an unknown character.
        if context.peek_is_map(char::is_whitespace) {
            context.skip_whitespace();
        } else {
            let Spanned { data, span } = context.next_span().unwrap();

            format_notification!("Unexpected character '{data}'")
                .severity(NotificationSeverity::Error)
                .span(span)
                .report(&mut result);
        }
    }

    println!("{:#?}", result.finalize());
}