mun_syntax 0.4.0

Parsing functionality for the Mun programming language
Documentation
use std::fmt::Write;

fn dump_tokens(tokens: &[crate::Token], text: &str) -> String {
    let mut acc = String::new();
    let mut offset = 0;
    for token in tokens {
        let len: u32 = token.len.into();
        let len = len as usize;
        let token_text = &text[offset..offset + len];
        offset += len;
        writeln!(acc, "{:?} {} {:?}", token.kind, len, token_text).unwrap()
    }
    acc
}

fn dump_text_tokens(text: &str) -> String {
    let tokens = crate::tokenize(text);
    dump_tokens(&tokens, text)
}

#[test]
fn numbers() {
    insta::assert_snapshot!(dump_text_tokens(
        r#"
    1.34
    0x3Af
    1e-3
    100_000
    0x3a_u32
    1f32
    0o71234"#), @r###"
    WHITESPACE 5 "\n    "
    FLOAT_NUMBER 4 "1.34"
    WHITESPACE 5 "\n    "
    INT_NUMBER 5 "0x3Af"
    WHITESPACE 5 "\n    "
    FLOAT_NUMBER 4 "1e-3"
    WHITESPACE 5 "\n    "
    INT_NUMBER 7 "100_000"
    WHITESPACE 5 "\n    "
    INT_NUMBER 8 "0x3a_u32"
    WHITESPACE 5 "\n    "
    INT_NUMBER 4 "1f32"
    WHITESPACE 5 "\n    "
    INT_NUMBER 7 "0o71234"
    "###);
}

#[test]
fn comments() {
    insta::assert_snapshot!(dump_text_tokens(
        r#"
    // hello, world!
    /**/
    /* block comment */
    /* multi
       line
       comment */
    /* /* nested */ */
    /* unclosed comment"#), @r###"
    WHITESPACE 5 "\n    "
    COMMENT 16 "// hello, world!"
    WHITESPACE 5 "\n    "
    COMMENT 4 "/**/"
    WHITESPACE 5 "\n    "
    COMMENT 19 "/* block comment */"
    WHITESPACE 5 "\n    "
    COMMENT 38 "/* multi\n       line\n       comment */"
    WHITESPACE 5 "\n    "
    COMMENT 18 "/* /* nested */ */"
    WHITESPACE 5 "\n    "
    COMMENT 19 "/* unclosed comment"
    "###);
}

#[test]
fn whitespace() {
    insta::assert_snapshot!(dump_text_tokens(
        r#"
    h e ll  o
    w

    o r     l   d"#), @r###"
    WHITESPACE 5 "\n    "
    IDENT 1 "h"
    WHITESPACE 1 " "
    IDENT 1 "e"
    WHITESPACE 1 " "
    IDENT 2 "ll"
    WHITESPACE 2 "  "
    IDENT 1 "o"
    WHITESPACE 5 "\n    "
    IDENT 1 "w"
    WHITESPACE 6 "\n\n    "
    IDENT 1 "o"
    WHITESPACE 1 " "
    IDENT 1 "r"
    WHITESPACE 5 "     "
    IDENT 1 "l"
    WHITESPACE 3 "   "
    IDENT 1 "d"
    "###);
}

#[test]
fn ident() {
    insta::assert_snapshot!(dump_text_tokens(
        r#"
    hello world_ _a2 _ __ x 即可编著课程
    "#), @r###"
    WHITESPACE 5 "\n    "
    IDENT 5 "hello"
    WHITESPACE 1 " "
    IDENT 6 "world_"
    WHITESPACE 1 " "
    IDENT 3 "_a2"
    WHITESPACE 1 " "
    UNDERSCORE 1 "_"
    WHITESPACE 1 " "
    IDENT 2 "__"
    WHITESPACE 1 " "
    IDENT 1 "x"
    WHITESPACE 1 " "
    IDENT 18 "即可编著课程"
    WHITESPACE 5 "\n    "
    "###);
}

#[test]
fn symbols() {
    insta::assert_snapshot!(dump_text_tokens(
        r#"
    # ( ) { } [ ] ; ,
    = ==
    !=
    < <=
    > >=
    . .. ... ..=
    + +=
    - -=
    * *=
    / /=
    % %=
    << <<=
    >> >>=
    && & &=
    || | |=
    ^ ^=
    : ::
    ->
    "#), @r###"
    WHITESPACE 5 "\n    "
    HASH 1 "#"
    WHITESPACE 1 " "
    L_PAREN 1 "("
    WHITESPACE 1 " "
    R_PAREN 1 ")"
    WHITESPACE 1 " "
    L_CURLY 1 "{"
    WHITESPACE 1 " "
    R_CURLY 1 "}"
    WHITESPACE 1 " "
    L_BRACKET 1 "["
    WHITESPACE 1 " "
    R_BRACKET 1 "]"
    WHITESPACE 1 " "
    SEMI 1 ";"
    WHITESPACE 1 " "
    COMMA 1 ","
    WHITESPACE 5 "\n    "
    EQ 1 "="
    WHITESPACE 1 " "
    EQ 1 "="
    EQ 1 "="
    WHITESPACE 5 "\n    "
    EXCLAMATION 1 "!"
    EQ 1 "="
    WHITESPACE 5 "\n    "
    LT 1 "<"
    WHITESPACE 1 " "
    LT 1 "<"
    EQ 1 "="
    WHITESPACE 5 "\n    "
    GT 1 ">"
    WHITESPACE 1 " "
    GT 1 ">"
    EQ 1 "="
    WHITESPACE 5 "\n    "
    DOT 1 "."
    WHITESPACE 1 " "
    DOT 1 "."
    DOT 1 "."
    WHITESPACE 1 " "
    DOT 1 "."
    DOT 1 "."
    DOT 1 "."
    WHITESPACE 1 " "
    DOT 1 "."
    DOT 1 "."
    EQ 1 "="
    WHITESPACE 5 "\n    "
    PLUS 1 "+"
    WHITESPACE 1 " "
    PLUS 1 "+"
    EQ 1 "="
    WHITESPACE 5 "\n    "
    MINUS 1 "-"
    WHITESPACE 1 " "
    MINUS 1 "-"
    EQ 1 "="
    WHITESPACE 5 "\n    "
    STAR 1 "*"
    WHITESPACE 1 " "
    STAR 1 "*"
    EQ 1 "="
    WHITESPACE 5 "\n    "
    SLASH 1 "/"
    WHITESPACE 1 " "
    SLASH 1 "/"
    EQ 1 "="
    WHITESPACE 5 "\n    "
    PERCENT 1 "%"
    WHITESPACE 1 " "
    PERCENT 1 "%"
    EQ 1 "="
    WHITESPACE 5 "\n    "
    LT 1 "<"
    LT 1 "<"
    WHITESPACE 1 " "
    LT 1 "<"
    LT 1 "<"
    EQ 1 "="
    WHITESPACE 5 "\n    "
    GT 1 ">"
    GT 1 ">"
    WHITESPACE 1 " "
    GT 1 ">"
    GT 1 ">"
    EQ 1 "="
    WHITESPACE 5 "\n    "
    AMP 1 "&"
    AMP 1 "&"
    WHITESPACE 1 " "
    AMP 1 "&"
    WHITESPACE 1 " "
    AMP 1 "&"
    EQ 1 "="
    WHITESPACE 5 "\n    "
    PIPE 1 "|"
    PIPE 1 "|"
    WHITESPACE 1 " "
    PIPE 1 "|"
    WHITESPACE 1 " "
    PIPE 1 "|"
    EQ 1 "="
    WHITESPACE 5 "\n    "
    CARET 1 "^"
    WHITESPACE 1 " "
    CARET 1 "^"
    EQ 1 "="
    WHITESPACE 5 "\n    "
    COLON 1 ":"
    WHITESPACE 1 " "
    COLON 1 ":"
    COLON 1 ":"
    WHITESPACE 5 "\n    "
    MINUS 1 "-"
    GT 1 ">"
    WHITESPACE 5 "\n    "
    "###);
}

#[test]
fn strings() {
    insta::assert_snapshot!(dump_text_tokens(
        r#"
    "Hello, world!"
    'Hello, world!'
    "\n"
    "\"\\"
    "multi
    line"
    "#), @r###"
    WHITESPACE 5 "\n    "
    STRING 15 "\"Hello, world!\""
    WHITESPACE 5 "\n    "
    STRING 15 "'Hello, world!'"
    WHITESPACE 5 "\n    "
    STRING 4 "\"\\n\""
    WHITESPACE 5 "\n    "
    STRING 6 "\"\\\"\\\\\""
    WHITESPACE 5 "\n    "
    STRING 16 "\"multi\n    line\""
    WHITESPACE 5 "\n    "
    "###);
}

#[test]
fn keywords() {
    insta::assert_snapshot!(dump_text_tokens(
        r#"
    break do else false for fn if in nil
    return true while let mut struct class
    never loop pub super self package type
    "#), @r###"
    WHITESPACE 5 "\n    "
    BREAK_KW 5 "break"
    WHITESPACE 1 " "
    DO_KW 2 "do"
    WHITESPACE 1 " "
    ELSE_KW 4 "else"
    WHITESPACE 1 " "
    FALSE_KW 5 "false"
    WHITESPACE 1 " "
    FOR_KW 3 "for"
    WHITESPACE 1 " "
    FN_KW 2 "fn"
    WHITESPACE 1 " "
    IF_KW 2 "if"
    WHITESPACE 1 " "
    IN_KW 2 "in"
    WHITESPACE 1 " "
    NIL_KW 3 "nil"
    WHITESPACE 5 "\n    "
    RETURN_KW 6 "return"
    WHITESPACE 1 " "
    TRUE_KW 4 "true"
    WHITESPACE 1 " "
    WHILE_KW 5 "while"
    WHITESPACE 1 " "
    LET_KW 3 "let"
    WHITESPACE 1 " "
    MUT_KW 3 "mut"
    WHITESPACE 1 " "
    STRUCT_KW 6 "struct"
    WHITESPACE 1 " "
    CLASS_KW 5 "class"
    WHITESPACE 5 "\n    "
    NEVER_KW 5 "never"
    WHITESPACE 1 " "
    LOOP_KW 4 "loop"
    WHITESPACE 1 " "
    PUB_KW 3 "pub"
    WHITESPACE 1 " "
    SUPER_KW 5 "super"
    WHITESPACE 1 " "
    SELF_KW 4 "self"
    WHITESPACE 1 " "
    PACKAGE_KW 7 "package"
    WHITESPACE 1 " "
    TYPE_KW 4 "type"
    WHITESPACE 5 "\n    "
    "###);
}

#[test]
fn unclosed_string() {
    insta::assert_snapshot!(dump_text_tokens(
        r#"
    "test
    "#), @r###"
    WHITESPACE 5 "\n    "
    STRING 10 "\"test\n    "
    "###);
}

#[test]
fn binary_cmp() {
    insta::assert_snapshot!(dump_text_tokens(
        r#"
    a==b
    a ==b
    a== b
    a == b
    "#), @r###"
    WHITESPACE 5 "\n    "
    IDENT 1 "a"
    EQ 1 "="
    EQ 1 "="
    IDENT 1 "b"
    WHITESPACE 5 "\n    "
    IDENT 1 "a"
    WHITESPACE 1 " "
    EQ 1 "="
    EQ 1 "="
    IDENT 1 "b"
    WHITESPACE 5 "\n    "
    IDENT 1 "a"
    EQ 1 "="
    EQ 1 "="
    WHITESPACE 1 " "
    IDENT 1 "b"
    WHITESPACE 5 "\n    "
    IDENT 1 "a"
    WHITESPACE 1 " "
    EQ 1 "="
    EQ 1 "="
    WHITESPACE 1 " "
    IDENT 1 "b"
    WHITESPACE 5 "\n    "
    "###);
}