[][src]Module wlambda::parser

This is the grammar parser for WLambda.

It produces an AST to be further transformed by wlambda::compiler::compile() into an executable form of the program.

The parser is a bit crufty as I did not go the extra step of writing a lexer/tokenizer. One goal of WLambda is to have a rather uncomplicated and small implementation, and I hope I could achieve that here.

The syntax of WLambda is in part also to make it a bit easier to parse in this hand written parser.

Full WLambda Lexical Syntax and Grammar

White space is everything that satisfies std::char::is_whitespace, so unicode white space is respected. Comments have the following syntax:

    comment = "#" ?anything except "\n"? "\n"

In the following grammar, white space and comments are omitted:


    ident_start   = ( ?alphabetic? | "_" | "@" )
    ident_end     = { ?any character?
                     - ( ?white space?
                         | "." | "," | ";"
                         | "{" | "}" | "[" | "]" | "(" | ")"
                         | "~" | "|" | "=" ) }
                  ;
    qident        = ident_end
                  (* a quoted identifier can not appear anywhere,
                     it's usually delimited or follows something that
                     makes sure we are now expecting an identifier *)
                  | "`", { ?any character except '`'? }, "`" (* quoted identifier *)
                  ;
    ident         = ident_start, [ ident_end ]
                  | "`", { ?any character except '`'? }, "`" (* quoted identifier *)
                  ;
    ref_specifier = ":", qident
                  ;

    digit         = "0" | "1" | "2" | "3" | "4" | "5"
                  | "6" | "7" | "8" | "9"
                  ;
    integer       = digit, { digit }
                  ;
    radix         = integer
                  ;
    radix_digits  = (* digits in the radix specified
                       earlier in the number.
                       Default radix is of course 10. *)
    number        = [ "-" | "+" ],
                    [ ( radix, "r"
                      | "0", "x"
                      | "0", "b"
                      | "0", "o"
                      ) ],
                    radix_digits,
                    [ ".", radix_digits ]
                  ;
    hexdigit      = ?hexdigit, upper or lower case?
                  ;
    string_escape = "x", hexdigit, hexdigit  (* byte/ascii escape *)
                  | "n"                      (* newline *)
                  | "r"                      (* carriage return *)
                  | "t"                      (* horizontal tab *)
                  | "0"                      (* nul byte/char *)
                  | "u", hexdigit, { hexdigit }
                                             (* unicode char, or in byte strings
                                                their utf-8 encoded form *)
                  | "\""
                  | "\'"
                  | "\\"
                  ;
    string        = "\"", { "\\", string_escape | ?any character? - "\\" },"\""
                  ;
    byte_string   = "b", string
                  ;
    quote_string  = "q", ?any character as quote?, { ?any character? }, ?any character as quote?
                  | "Q", ?any character as quote?, { ?any character? }, ?any character as quote?
                    (* but Q generates a byte string instead! *)
                  ;
    list_expr     = "*", expr   (* splices the vector result of 'expr'
                                   into the currently parsed list *)
                  | expr
                  ;
    list          = "[", [ list_expr, { ",", list_expr }, [ "," ] ],"]"
                  ;
    map_expr      = (ident | expr), "=", expr
                  | "*" expr    (* splices the map result of 'expr'
                                   into the currently parsed map *)
                  ;
    map           = "{", [ map_expr, { ",", map_expr }, [ "," ] ], "}"
                  ;
    self          = "s" | "self"
                  ;
    true          = "t" | "true"
                  ;
    false         = "f" | "false"
                  ;
    none          = "n" | "none
                  ;
    err           = ("e" | "error"), expr
                  ;
    ref           = "&&", value
                  ;
    wref          = "&", value
                  ;
    accumulator   = "@", ("i" | "int"
                         |"s" | "string"
                         |"f" | "float"
                         |"b" | "bytes"
                         |"v" | "vec"
                         |"m" | "map" ), expr
                    (* defines a new accumulator context *)
                  | "@", ("a" | "accum")
                    (* returns the current accumulator value *)
                  | "+" (* resolves to the current accumulator function *)
                  ;
    capture_ref   = ":", var
                  ;
    deref         = "*", value
                  ;
    special_value = byte_string
                  | quote_string
                  | list
                  | map
                  | none
                  | true
                  | false
                  | self
                  | err
                  | ref
                  | wref
                  | deref
                  | capture_ref
                  | accumulator
                  ;
    arity_def     = "|", number, "<", number, "|" (* set min/max *)
                  | "|", number, "|"              (* set min and max *)
                  | "|", "|"                      (* no enforcement *)
                  ;
    function      = [ "\:", ident ], "{", [ arity_def ], block, "}"
                  | "\", [ arity_def ], statement
                  ;
    var           = ident
                  ;
    symbol        = ":", qident
                  | ":", "\"", (? any char, quoted \\ and \" ?), "\""
                  (*
                     symbols are usually used to specify
                     fields in literal map definitions
                     and lots of other places as stringy sentinel values
                  *)
                  ;
    value         = number
                  | string
                  | "$", special_value
                  | "(", expr, ")"
                  | function
                  | symbol
                  | var
                  ;
    op            = (* here all operators are listed line by line regarding
                       their precedence, top to bottom *)
                    "^"
                  | "*" | "/" | "%"
                  | "-" | "+"
                  | "<<" | ">>"       (* binary shift *)
                  | "<" | ">" | "<=" | ">="
                  | "==" | "!="
                  | "&"               (* binary and *)
                  | "&^"              (* binary xor *)
                  | "&|"              (* binary or *)
                  | "&and"            (* logical and, short circuit *)
                  | "&or"             (* logical or, short circuit *)
                  ;
    bin_op        = call_no_ops, { op, bin_op } (* precedence parsing is done
                                                   in a Pratt parser style *)
                  ;
    arg_list      = "[", [ expr, { ",", expr }, [ "," ] ], "]"
                  | "[[", expr, "]]"  (* apply result vector of expr as argument list *)
                  ;
    field         = ".", ( integer | ident | value ), [ field ]
                  ;
    field_access  = field, "=", expr
                  | field, arg_list
                  | field
                  (* please note, that a field access like:
                     `obj.field` is equivalent to the call:
                     `field[obj]`. That also means that
                     `obj.field[...]` is transformed into
                     `field[obj][...]`.
                     The exception is "=" which assigns
                     the field as specified.
                     BUT: There is a special case, when you specify
                     an `indent` it is quoted and interpreted as symbol. *)
                  ;
    call_no_ops   = value, { arg_list | field_access }, [ "~", expr ]
                  ;
    call          = value,
                    { arg_list | field_access | bin_op | value },
                    [ "~", expr ] (* this is a tail argument, if present the
                                     expr is appended to the argument list *)
                  ;
    expr          = call, { "|", call }
                  ;
    simple_assign = qident, "=", expr
                  ;
    destr_assign  = "(", [ qident, { ",", qident } ], ")", "=" expr
                  ;
    definition    = [ ref_specifier ], ( simple_assign | destr_assign )
                  ;
    import        = "!", "@import", symbol, [ "=" ], symbol
                  | "!", "@wlambda"
                  ;
    export        = "!", "@export", symbol, [ "=" ], expr
                  ;
    statement     = "!" definition
                  | "." simple_assign
                  | "." destr_assign
                  | import
                  | export
                  | expr
                  ;
    block         = "{", { statement, ";", {";"}}, [ statement, {";"} ], "}"
                  | { statement, ";", {";"} }, [ statement, {";"} ]
                  ;
    code          = block
                  ;

Re-exports

pub use state::State;
pub use state::ParseError;

Modules

state

Functions

parse

Facade function for an undelimited parse_block.

parse_block

This function parses the an optionally delimited block of WLambda statements.