start = { SOI ~ value ~ EOI }
// Python literal.
value = { string | bytes | number_expr | tuple | list | dict | set | boolean | none }
// Strings: "string", 'string', """string""", and '''string'''.
// Raw strings are not implemented.
string = ${
(PUSH("\"" | "'") ~ short_string_body ~ POP) |
(PUSH("\"\"\"" | "'''") ~ long_string_body ~ POP)
}
short_string_body = ${
(short_string_non_escape | line_continuation_seq | string_escape_seq | string_unknown_escape)*
}
short_string_non_escape = @{
(!("\\" | newline | PEEK) ~ ANY)+
}
long_string_body = ${
(long_string_non_escape | line_continuation_seq | string_escape_seq | string_unknown_escape)*
}
long_string_non_escape = @{
(!("\\" | PEEK) ~ ANY)+
}
string_escape_seq = ${ "\\" ~ (char_escape | octal_escape | hex_escape | unicode_hex_escape | name_escape) }
unicode_hex_escape = @{ "u" ~ hex_digit{4} | "U" ~ hex_digit{8} }
name_escape = @{ "N{" ~ (!"}" ~ ANY)* ~ "}" }
string_unknown_escape = @{ "\\" ~ ANY }
// Bytes: b"bytes", b'bytes', b"""bytes""", b'''bytes''', B"bytes", ...
// Raw bytes are not implemented.
bytes = ${
(("b" | "B") ~ PUSH("\"" | "'") ~ short_bytes_body ~ POP) |
(("b" | "B") ~ PUSH("\"\"\"" | "'''") ~ long_bytes_body ~ POP)
}
short_bytes_body = ${
(short_bytes_non_escape | line_continuation_seq | bytes_escape_seq | bytes_unknown_escape)*
}
short_bytes_non_escape = @{
(!("\\" | newline | PEEK) ~ ascii_char)+
}
long_bytes_body = ${
(long_bytes_non_escape | line_continuation_seq | bytes_escape_seq | bytes_unknown_escape)*
}
long_bytes_non_escape = @{
(!("\\" | PEEK) ~ ascii_char)+
}
bytes_escape_seq = ${ "\\" ~ (char_escape | octal_escape | hex_escape) }
bytes_unknown_escape = @{ "\\" ~ ascii_char }
// Escape sequences common to strings and bytes.
line_continuation_seq = @{ "\\" ~ newline }
char_escape = @{ "\\" | "'" | "\"" | "a" | "b" | "f" | "n" | "r" | "t" | "v" }
octal_escape = @{ oct_digit{1, 3} }
hex_escape = @{ "x" ~ hex_digit{2} }
// Number expressions.
number_expr = { ("+" | minus_sign)* ~ number ~ (("+" | minus_sign)+ ~ number)* }
number = ${ imag | float | integer }
minus_sign = @{ "-" }
// Integers.
integer = ${ bin_integer | oct_integer | hex_integer | dec_integer }
bin_integer = ${ "0" ~ ("b" | "B") ~ ("_"? ~ bin_digit)+ }
oct_integer = ${ "0" ~ ("o" | "O") ~ ("_"? ~ oct_digit)+ }
hex_integer = ${ "0" ~ ("x" | "X") ~ ("_"? ~ hex_digit)+ }
dec_integer = ${ digit ~ ("_"? ~ digit)* }
// Floats.
float = ${ exponent_float | point_float }
point_float = ${ digit_part? ~ fraction | digit_part ~ "." }
exponent_float = ${ (point_float | digit_part) ~ (pos_exponent | neg_exponent) }
digit_part = ${ digit ~ ("_"? ~ digit)* }
fraction = ${ "." ~ digit_part }
pos_exponent = ${ ("e" | "E") ~ "+"? ~ digit_part }
neg_exponent = ${ ("e" | "E") ~ "-" ~ digit_part }
// Imaginary number.
imag = ${ (float | digit_part) ~ ("j" | "J") }
// Tuples.
tuple = { "(" ~ ((value ~ ",")+ ~ value?)? ~ ")" }
// Lists.
list = { "[" ~ (value ~ ",")* ~ value? ~ "]" }
// Dictionaries.
dict = { "{" ~ (dict_elem ~ ",")* ~ dict_elem? ~ "}" }
dict_elem = { value ~ ":" ~ value }
// Sets.
set = { "{" ~ value ~ ("," ~ value)* ~ ","? ~ "}" }
// Booleans.
boolean = @{ "True" | "False" }
// None.
none = @{ "None" }
// Character classes.
ascii_char = @{ '\x00'..'\x7f' }
hex_digit = @{ '0'..'9' | 'A'..'F' | 'a'..'f' }
digit = @{ '0'..'9' }
oct_digit = @{ '0'..'7' }
bin_digit = @{ '0'..'1' }
newline = @{ "\r\n" | "\n" | "\r" }
WHITESPACE = _{ " " | "\t" | "\x0C" }