py_literal 0.4.0

Read and write Python literals
Documentation
start = { SOI ~ value ~ EOI }

// Python literal.
value = { string | bytes | number_expr | tuple | list | dict | set | boolean | none }

// Strings: "string", 'string', """string""", and '''string'''.
// Raw strings are not implemented.
string = ${
    (PUSH("\"" | "'") ~ short_string_body ~ POP) |
    (PUSH("\"\"\"" | "'''") ~ long_string_body ~ POP)
}
short_string_body = ${
    (short_string_non_escape | line_continuation_seq | string_escape_seq | string_unknown_escape)*
}
short_string_non_escape = @{
    (!("\\" | newline | PEEK) ~ ANY)+
}
long_string_body = ${
    (long_string_non_escape | line_continuation_seq | string_escape_seq | string_unknown_escape)*
}
long_string_non_escape = @{
    (!("\\" | PEEK) ~ ANY)+
}
string_escape_seq = ${ "\\" ~ (char_escape | octal_escape | hex_escape | unicode_hex_escape | name_escape) }
unicode_hex_escape = @{ "u" ~ hex_digit{4} | "U" ~ hex_digit{8} }
name_escape = @{ "N{" ~ (!"}" ~ ANY)* ~ "}" }
string_unknown_escape = @{ "\\" ~ ANY }

// Bytes: b"bytes", b'bytes', b"""bytes""", b'''bytes''', B"bytes", ...
// Raw bytes are not implemented.
bytes = ${
    (("b" | "B") ~ PUSH("\"" | "'") ~ short_bytes_body ~ POP) |
    (("b" | "B") ~ PUSH("\"\"\"" | "'''") ~ long_bytes_body ~ POP)
}
short_bytes_body = ${
    (short_bytes_non_escape | line_continuation_seq | bytes_escape_seq | bytes_unknown_escape)*
}
short_bytes_non_escape = @{
    (!("\\" | newline | PEEK) ~ ascii_char)+
}
long_bytes_body = ${
    (long_bytes_non_escape | line_continuation_seq | bytes_escape_seq | bytes_unknown_escape)*
}
long_bytes_non_escape = @{
    (!("\\" | PEEK) ~ ascii_char)+
}
bytes_escape_seq = ${ "\\" ~ (char_escape | octal_escape | hex_escape) }
bytes_unknown_escape = @{ "\\" ~ ascii_char }

// Escape sequences common to strings and bytes.
line_continuation_seq = @{ "\\" ~ newline }
char_escape = @{ "\\" | "'" | "\"" | "a" | "b" | "f" | "n" | "r" | "t" | "v" }
octal_escape = @{ oct_digit{1, 3} }
hex_escape = @{ "x" ~ hex_digit{2} }

// Number expressions.
number_expr = { ("+" | minus_sign)* ~ number ~ (("+" | minus_sign)+ ~ number)* }
number = ${ imag | float | integer }
minus_sign = @{ "-" }

// Integers.
integer = ${ bin_integer | oct_integer | hex_integer | dec_integer }
bin_integer = ${ "0" ~ ("b" | "B") ~ ("_"? ~ bin_digit)+ }
oct_integer = ${ "0" ~ ("o" | "O") ~ ("_"? ~ oct_digit)+ }
hex_integer = ${ "0" ~ ("x" | "X") ~ ("_"? ~ hex_digit)+ }
dec_integer = ${ digit ~ ("_"? ~ digit)* }

// Floats.
float = ${ exponent_float | point_float }
point_float = ${ digit_part? ~ fraction | digit_part ~ "." }
exponent_float = ${ (point_float | digit_part) ~ (pos_exponent | neg_exponent) }
digit_part = ${ digit ~ ("_"? ~ digit)* }
fraction = ${ "." ~ digit_part }
pos_exponent = ${ ("e" | "E") ~ "+"? ~ digit_part }
neg_exponent = ${ ("e" | "E") ~ "-" ~ digit_part }

// Imaginary number.
imag = ${ (float | digit_part) ~ ("j" | "J") }

// Tuples.
tuple = { "(" ~ ((value ~ ",")+ ~ value?)? ~ ")" }

// Lists.
list = { "[" ~ (value ~ ",")* ~ value? ~ "]" }

// Dictionaries.
dict = { "{" ~ (dict_elem ~ ",")* ~ dict_elem? ~ "}" }
dict_elem = { value ~ ":" ~ value }

// Sets.
set = { "{" ~ value ~ ("," ~ value)* ~ ","? ~ "}" }

// Booleans.
boolean = @{ "True" | "False" }

// None.
none = @{ "None" }

// Character classes.
ascii_char = @{ '\x00'..'\x7f' }
hex_digit = @{ '0'..'9' | 'A'..'F' | 'a'..'f' }
digit = @{ '0'..'9' }
oct_digit = @{ '0'..'7' }
bin_digit = @{ '0'..'1' }
newline = @{ "\r\n" | "\n" | "\r" }
WHITESPACE = _{ " " | "\t" | "\x0C" }