php-literal-parser 0.7.1

parser for php literals
Documentation
use logos::{Lexer, Logos, Span};
use parse_display::Display;
use std::fmt::{Debug, Formatter};

#[derive(Logos, Debug, PartialEq, Clone, Copy, Display)]
#[logos(skip r"(#|//)[^\n]*")]
#[logos(skip r"/\*([^*]|\*[^/])+\*/")]
#[logos(skip r"[ \t\n\f]+")]
pub enum Token {
    #[token("array")]
    #[display("'array'")]
    Array,
    #[regex("(?i:true|false)")]
    #[display("boolean literal")]
    Bool,
    #[regex("(?i)null")]
    #[display("'null'")]
    Null,
    #[token("=>")]
    #[display("'=>'")]
    Arrow,
    #[token("(")]
    #[display("'('")]
    BracketOpen,
    #[token(")")]
    #[display("')'")]
    BracketClose,
    #[token("[")]
    #[display("'['")]
    SquareOpen,
    #[token("]")]
    #[display("']'")]
    SquareClose,
    #[token(",")]
    #[display("','")]
    Comma,
    #[display("string literal")]
    #[regex("(\"([^\"\\\\]|\\\\.)*\")|(\'([^\'\\\\]|\\\\.)*\')")]
    LiteralString,
    #[display("float literal")]
    #[regex("-?((([0-9]+(_[0-9]+)*|([0-9]*(_[0-9]+)*[\\.][0-9]+(_[0-9]+)*)|([0-9]+(_[0-9]+)*[\\.][0-9]*(_[0-9]+)*)))[eE][+-]?[0-9]+(_[0-9]+)*|([0-9]*(_[0-9]+)*[\\.][0-9]+(_[0-9]+)*)|([0-9]+(_[0-9]+)*[\\.][0-9]*(_[0-9]+)*))")]
    Float,
    #[display("integer literal")]
    #[regex("-?(0|[1-9][0-9]*(_[0-9]+)*|0[xX][0-9a-fA-F]+(_[0-9a-fA-F]+)*|0[0-7]+(_[0-7]+)*|0[bB][01]+(_[01]+)*)")]
    Integer,
    #[token(";")]
    #[display("';'")]
    SemiColon,
    Error,
}

impl Token {
    pub(crate) fn is_valid_array_key(&self) -> bool {
        matches!(
            self,
            Token::Integer | Token::LiteralString | Token::Bool | Token::Float | Token::Null
        )
    }
}

#[test]
fn test_lex() {
    let source = r###"
    array (
        "double" => "quote",
        'single' => 'quote',
        "escaped" => "\"quote\"",
        1 => 2,
        "nested" => [
            "sub" => "key",
        ],
        "array" => [1,2,3,4],
        "bool" => false,
        "negative" => -1,
        "null" => Null,
    )
    "###;
    let mut lex = Token::lexer(source);

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Array));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::BracketOpen));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Arrow));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Arrow));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Arrow));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Integer));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Arrow));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Integer));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Arrow));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::SquareOpen));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Arrow));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::SquareClose));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Arrow));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::SquareOpen));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Integer));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Integer));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Integer));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Integer));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::SquareClose));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Arrow));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Bool));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Arrow));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Integer));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Arrow));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Null));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::BracketClose));

    assert_eq!(lex.next().transpose().unwrap(), None);
}

#[test]
fn test_lex_int() {
    let source = r###"0,123,0x123,0123,0b111,12_34_56"###;
    let mut lex = Token::lexer(source);

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Integer));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Integer));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Integer));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Integer));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Integer));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Integer));
    assert_eq!(lex.next().transpose().unwrap(), None);
}

#[test]
fn test_lex_float() {
    let source = r###".1,123.0,123e1,123e+1,123e-1,1_23.456"###;
    let mut lex = Token::lexer(source);

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Float));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Float));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Float));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Float));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Float));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Float));
    assert_eq!(lex.next().transpose().unwrap(), None);
}

#[test]
fn test_lex_comments() {
    let source = r###"
    array (
        /**
         * multi line comment
         */
        "double" => /** inline commend */ "quote", //line comment
    )
    "###;
    let mut lex = Token::lexer(source);

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Array));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::BracketOpen));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Arrow));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::LiteralString));
    assert_eq!(lex.next().transpose().unwrap(), Some(Token::Comma));

    assert_eq!(lex.next().transpose().unwrap(), Some(Token::BracketClose));
}

#[derive(Clone)]
pub struct SpannedToken<'source> {
    pub token: Token,
    pub span: Span,
    pub source: &'source str,
}

impl<'source> SpannedToken<'source> {
    pub fn slice(&self) -> &'source str {
        &self.source[self.span.clone()]
    }
}

impl Debug for SpannedToken<'_> {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "SpannedToken {{ {:?}: \"{}\"}} ",
            self.token,
            self.slice()
        )
    }
}

pub struct TokenStream<'source> {
    lexer: Lexer<'source, Token>,
}

impl<'source> TokenStream<'source> {
    pub fn new(lexer: Lexer<'source, Token>) -> Self {
        TokenStream { lexer }
    }
}

impl<'source> Iterator for TokenStream<'source> {
    type Item = SpannedToken<'source>;

    fn next(&mut self) -> Option<Self::Item> {
        let token = self.lexer.next()?.unwrap_or(Token::Error);
        Some(SpannedToken {
            token,
            span: self.lexer.span(),
            source: self.lexer.source(),
        })
    }
}