[−][src]Module wlambda::parser
This is the grammar parser for WLambda.
It produces an AST to be further
transformed by wlambda::compiler::compile()
into an executable form
of the program.
The parser is a bit crufty as I did not go the extra step of writing a lexer/tokenizer. One goal of WLambda is to have a rather uncomplicated and small implementation, and I hope I could achieve that here.
The syntax of WLambda is in part also to make it a bit easier to parse in this hand written parser.
Full WLambda Lexical Syntax and Grammar
White space is everything that satisfies std::char::is_whitespace
,
so unicode white space is respected. Comments have the following syntax:
comment = "#" ?anything except "\n"? "\n"
In the following grammar, white space and comments are omitted:
ident_start = ( ?alphabetic? | "_" | "@" )
ident_end = { ?any character?
- ( ?white space?
| "." | "," | ";"
| "{" | "}" | "[" | "]" | "(" | ")"
| "~" | "|" | "=" ) }
;
qident = ident_end
(* a quoted identifier can not appear anywhere,
it's usually delimited or follows something that
makes sure we are now expecting an identifier *)
| "`", { ?any character except '`'? }, "`" (* quoted identifier *)
;
ident = ident_start, [ ident_end ]
| "`", { ?any character except '`'? }, "`" (* quoted identifier *)
;
ref_specifier = ":", qident
;
digit = "0" | "1" | "2" | "3" | "4" | "5"
| "6" | "7" | "8" | "9"
;
integer = digit, { digit }
;
radix = integer
;
radix_digits = (* digits in the radix specified
earlier in the number.
Default radix is of course 10. *)
number = [ "-" | "+" ],
[ ( radix, "r"
| "0", "x"
| "0", "b"
| "0", "o"
) ],
radix_digits,
[ ".", radix_digits ]
;
hexdigit = ?hexdigit, upper or lower case?
;
string_escape = "x", hexdigit, hexdigit (* byte/ascii escape *)
| "n" (* newline *)
| "r" (* carriage return *)
| "t" (* horizontal tab *)
| "0" (* nul byte/char *)
| "u", hexdigit, { hexdigit }
(* unicode char, or in byte strings
their utf-8 encoded form *)
| "\""
| "\'"
| "\\"
;
string = "\"", { "\\", string_escape | ?any character? - "\\" },"\""
;
byte_string = "b", string
;
quote_string = "q", ?any character as quote?, { ?any character? }, ?any character as quote?
| "Q", ?any character as quote?, { ?any character? }, ?any character as quote?
(* but Q generates a byte string instead! *)
;
list_expr = "*", expr (* splices the vector result of 'expr'
into the currently parsed list *)
| expr
;
list = "[", [ list_expr, { ",", list_expr }, [ "," ] ],"]"
;
map_expr = (ident | expr), "=", expr
| "*" expr (* splices the map result of 'expr'
into the currently parsed map *)
;
map = "{", [ map_expr, { ",", map_expr }, [ "," ] ], "}"
;
self = "s" | "self"
;
true = "t" | "true"
;
false = "f" | "false"
;
none = "n" | "none
;
err = ("e" | "error"), expr
;
ref = "&&", value
;
wref = "&", value
;
accumulator = "@", ("i" | "int"
|"s" | "string"
|"f" | "float"
|"b" | "bytes"
|"v" | "vec"
|"m" | "map" ), expr
(* defines a new accumulator context *)
| "@", ("a" | "accum")
(* returns the current accumulator value *)
| "+" (* resolves to the current accumulator function *)
;
capture_ref = ":", var
;
deref = "*", value
;
special_value = byte_string
| quote_string
| list
| map
| none
| true
| false
| self
| err
| ref
| wref
| deref
| capture_ref
| accumulator
;
arity_def = "|", number, "<", number, "|" (* set min/max *)
| "|", number, "|" (* set min and max *)
| "|", "|" (* no enforcement *)
;
function = [ "\:", ident ], "{", [ arity_def ], block, "}"
| "\", [ arity_def ], statement
;
var = ident
;
symbol = ":", qident
| ":", "\"", (? any char, quoted \\ and \" ?), "\""
(*
symbols are usually used to specify
fields in literal map definitions
and lots of other places as stringy sentinel values
*)
;
value = number
| string
| "$", special_value
| "(", expr, ")"
| function
| symbol
| var
;
op = (* here all operators are listed line by line regarding
their precedence, top to bottom *)
"^"
| "*" | "/" | "%"
| "-" | "+"
| "<<" | ">>" (* binary shift *)
| "<" | ">" | "<=" | ">="
| "==" | "!="
| "&" (* binary and *)
| "&^" (* binary xor *)
| "&|" (* binary or *)
| "&and" (* logical and, short circuit *)
| "&or" (* logical or, short circuit *)
;
bin_op = call_no_ops, { op, bin_op } (* precedence parsing is done
in a Pratt parser style *)
;
arg_list = "[", [ expr, { ",", expr }, [ "," ] ], "]"
| "[[", expr, "]]" (* apply result vector of expr as argument list *)
;
field = ".", ( integer | ident | value ), [ field ]
;
field_access = field, "=", expr
| field, arg_list
| field
(* please note, that a field access like:
`obj.field` is equivalent to the call:
`field[obj]`. That also means that
`obj.field[...]` is transformed into
`field[obj][...]`.
The exception is "=" which assigns
the field as specified.
BUT: There is a special case, when you specify
an `indent` it is quoted and interpreted as symbol. *)
;
call_no_ops = value, { arg_list | field_access }, [ "~", expr ]
;
call = value,
{ arg_list | field_access | bin_op | value },
[ "~", expr ] (* this is a tail argument, if present the
expr is appended to the argument list *)
;
expr = call, { "|", call }
;
simple_assign = qident, "=", expr
;
destr_assign = "(", [ qident, { ",", qident } ], ")", "=" expr
;
definition = [ ref_specifier ], ( simple_assign | destr_assign )
;
import = "!", "@import", symbol, [ "=" ], symbol
| "!", "@wlambda"
;
export = "!", "@export", symbol, [ "=" ], expr
;
statement = "!" definition
| "." simple_assign
| "." destr_assign
| import
| export
| expr
;
block = "{", { statement, ";", {";"}}, [ statement, {";"} ], "}"
| { statement, ";", {";"} }, [ statement, {";"} ]
;
code = block
;
Re-exports
pub use state::State; |
pub use state::ParseError; |
Modules
state |
Functions
parse | Facade function for an undelimited |
parse_block | This function parses the an optionally delimited block of WLambda statements. |