overlay-file 1.0.0

Rust implementation of OverlayFile used to define overlays in OCA
Documentation
// BEGIN of GENERIC RULES

/// insignificant whitespace, not repeated
ws = _{ " " | "\t" }
/// 2 spaces or tab allowed for indented line
iws    =  _{ "  " | "\t" }
arg_ws = _{ (ws | line_continuation ~ (comment_line | empty_line)*)+ }
/// a line continuation, allowing an instruction to continue onto a new line
line_continuation = _{ "\\" ~ ws* ~ NEWLINE }
comment           = @{ "#" ~ (!NEWLINE ~ ANY)* }
comment_line      = _{ ws* ~ comment ~ NEWLINE? }
empty_line        = @{ ws* ~ NEWLINE }

meta_attr_key     = ${ "name" | "version" | "precompiler" }
meta_attr_value   = ${ string | char+ }
meta_key_pair     = @{ meta_attr_key ~ arg_ws? ~ "=" ~ arg_ws? ~ meta_attr_value }
meta_comment      = @{ "--" ~ ws* ~ (!NEWLINE ~ meta_key_pair)* }
meta_comment_line = _{ ws* ~ meta_comment ~ NEWLINE? }

string               = ${ single_quoted_string | double_quoted_string }
single_quoted_string = _{ "'" ~ single_quoted_inner ~ "'" }
single_quoted_inner  = @{ (!("'" | "\\" | "\u{0000}" | "\u{001F}") ~ ANY)* ~ (escape ~ single_quoted_inner)? }
double_quoted_string = _{ "\"" ~ inner ~ "\"" }
inner                = @{ (!("\"" | "\\" | "\u{0000}" | "\u{001F}") ~ ANY)* ~ (escape ~ inner)? }
escape               = @{ "\\" ~ ("b" | "t" | "n" | "f" | "r" | "\"" | "\\" | "'" | unicode | NEWLINE)? }
unicode              = @{ "u" ~ ASCII_HEX_DIGIT{4} | "U" ~ ASCII_HEX_DIGIT{8} }

SCRIPTS = {
    ADLAM
  | AHOM
  | ANATOLIAN_HIEROGLYPHS
  | ARABIC
  | ARMENIAN
  | AVESTAN
  | BALINESE
  | BAMUM
  | BASSA_VAH
  | BATAK
  | BENGALI
  | BHAIKSUKI
  | BOPOMOFO
  | BRAHMI
  | BRAILLE
  | BUGINESE
  | BUHID
  | CANADIAN_ABORIGINAL
  | CARIAN
  | CAUCASIAN_ALBANIAN
  | CHAKMA
  | CHAM
  | CHEROKEE
  | CHORASMIAN
  | COPTIC
  | CUNEIFORM
  | CYPRIOT
  | CYPRO_MINOAN
  | CYRILLIC
  | DESERET
  | DEVANAGARI
  | DIVES_AKURU
  | DOGRA
  | DUPLOYAN
  | EGYPTIAN_HIEROGLYPHS
  | ELBASAN
  | ELYMAIC
  | ETHIOPIC
  | GEORGIAN
  | GLAGOLITIC
  | GOTHIC
  | GRANTHA
  | GREEK
  | GUJARATI
  | GUNJALA_GONDI
  | GURMUKHI
  | HAN
  | HANGUL
  | HANIFI_ROHINGYA
  | HANUNOO
  | HATRAN
  | HEBREW
  | HIRAGANA
  | IMPERIAL_ARAMAIC
  | INHERITED
  | INSCRIPTIONAL_PAHLAVI
  | INSCRIPTIONAL_PARTHIAN
  | JAVANESE
  | KAITHI
  | KANNADA
  | KATAKANA
  | KAWI
  | KAYAH_LI
  | KHAROSHTHI
  | KHITAN_SMALL_SCRIPT
  | KHMER
  | KHOJKI
  | KHUDAWADI
  | LAO
  | LATIN
  | LEPCHA
  | LIMBU
  | LINEAR_A
  | LINEAR_B
  | LISU
  | LYCIAN
  | LYDIAN
  | MAHAJANI
  | MAKASAR
  | MALAYALAM
  | MANDAIC
  | MANICHAEAN
  | MARCHEN
  | MASARAM_GONDI
  | MEDEFAIDRIN
  | MEETEI_MAYEK
  | MENDE_KIKAKUI
  | MEROITIC_CURSIVE
  | MEROITIC_HIEROGLYPHS
  | MIAO
  | MODI
  | MONGOLIAN
  | MRO
  | MULTANI
  | MYANMAR
  | NABATAEAN
  | NAG_MUNDARI
  | NANDINAGARI
  | NEW_TAI_LUE
  | NEWA
  | NKO
  | NUSHU
  | NYIAKENG_PUACHUE_HMONG
  | OGHAM
  | OL_CHIKI
  | OLD_HUNGARIAN
  | OLD_ITALIC
  | OLD_NORTH_ARABIAN
  | OLD_PERMIC
  | OLD_PERSIAN
  | OLD_SOGDIAN
  | OLD_SOUTH_ARABIAN
  | OLD_TURKIC
  | OLD_UYGHUR
  | ORIYA
  | OSAGE
  | OSMANYA
  | PAHAWH_HMONG
  | PALMYRENE
  | PAU_CIN_HAU
  | PHAGS_PA
  | PHOENICIAN
  | PSALTER_PAHLAVI
  | REJANG
  | RUNIC
  | SAMARITAN
  | SAURASHTRA
  | SHARADA
  | SHAVIAN
  | SIDDHAM
  | SIGNWRITING
  | SINHALA
  | SOGDIAN
  | SORA_SOMPENG
  | SOYOMBO
  | SUNDANESE
  | SYLOTI_NAGRI
  | SYRIAC
  | TAGALOG
  | TAGBANWA
  | TAI_LE
  | TAI_THAM
  | TAI_VIET
  | TAKRI
  | TAMIL
  | TANGSA
  | TANGUT
  | TELUGU
  | THAANA
  | THAI
  | TIBETAN
  | TIFINAGH
  | TIRHUTA
  | TOTO
  | UGARITIC
  | VAI
  | VITHKUQI
  | WANCHO
  | WARANG_CITI
  | YEZIDI
  | YI
  | ZANABAZAR_SQUARE
}
char    = { LETTER | NUMBER | "." | "-" | "_" | "/" | ":" | SCRIPTS }

file = {
    SOI ~ (empty_line | meta_comment_line | comment_line | overlay_block)* ~ EOI
}

overlay_block     =  { overlay_header ~ overlay_body }
overlay_header    =  _{ ^"ADD OVERLAY" ~ ws+ ~ overlay_name ~ ws* ~ NEWLINE }
/// Overlay name including namespace
overlay_name      = @{ (ASCII_ALPHANUMERIC | ":" | "_")+ }
overlay_body      =  _{ overlay_version ~ unique_keys_command? ~ overlay_entry+ }
overlay_entry      =  _{ ( overlay_object | overlay_array | overlay_attributes)+ }
/// The version of the overlay
overlay_version   =  _{ iws* ~ ^"VERSION" ~ ws+ ~ version ~ ws* ~ NEWLINE }

unique_keys_command = { iws* ~ ^"unique" ~ arg_ws? ~ ^"keys" ~ arg_ws? ~ (unique_key | unique_keys) ~ NEWLINE* }
/// List of attributes which are uniquely identifing overlay
unique_key = { attr_name }
unique_keys = { "[" ~ arg_ws* ~ unique_key ~ (arg_ws* ~ "," ~ arg_ws* ~ unique_key)* ~ arg_ws* ~ "]" }

overlay_object = { overlay_object_header ~ overlay_object_body }
overlay_object_header = { iws* ~ ^"ADD OBJECT" ~ ws+ ~ attr_name ~ ws* ~ NEWLINE* }
overlay_object_body = {
    iws* ~ ^"WITH KEYS" ~ ws+ ~ key_type ~ ws* ~ NEWLINE
    ~ iws* ~ ^"WITH VALUES" ~ ws+ ~ value_type ~ ws* ~ NEWLINE*
}

overlay_array = { overlay_array_header ~ overlay_array_body }
overlay_array_header = { iws* ~ ^"ADD ARRAY" ~ ws+ ~ attr_name ~ ws* ~ NEWLINE* }
overlay_array_body = { iws* ~ ^"WITH VALUES" ~ ws+ ~ value_type ~ ws* ~ NEWLINE* }


overlay_attributes = { iws* ~ ^"ADD ATTRIBUTES" ~ ws* ~ (key_pair+ | ATTR_ARRAY) }
keys_with_values = { iws* ~ ^"WITH VALUES" ~ ws* ~ value_type ~ ws* ~ NEWLINE* }
key_pair = { ws* ~ attr_name ~ ws* ~ "=" ~ ws* ~ attr_value_type ~ ws* ~ NEWLINE* }
attr_value_type = { "Text" | "Binary" | "Number" | "Boolean" | "DateTime" | "Array" | LANG_TYPE | ANY_TYPE }

attr_name = ${ (ASCII_ALPHANUMERIC | "-" | "_" | ".")+ }
version = @{ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }



/// Type of the keys allowed for given object,
/// - "attr-names" - attribute from capture base,
/// - "Text" - any string,
/// - "array" - array of strings either fixed or with ellipsis allowing additional strings
key_type   = { ATTR_NAMES_TYPE | TEXT_TYPE | ARRAY_KEY_TYPE }
ATTR_NAMES_TYPE = @{ ^"attr-names" }
TEXT_TYPE      = @{ ^"Text" }
REF_TYPE       = @{ ^"REF" }
/// Don't validate value of the key, just allow any type
ANY_TYPE       = @{ ^"Any" }
/// ISO 639-1 (+country code) or 639-3 language code
LANG_TYPE      = @{ ^"Lang" }
ATTR_ARRAY = { ARRAY_KEY_TYPE ~ NEWLINE+ ~ keys_with_values }

ARRAY_KEY_TYPE = { "[" ~ arg_ws* ~ array_content ~ arg_ws* ~ "]" }
array_content = { (array_items ~ (arg_ws* ~ "," ~ arg_ws* ~ trailing_ellipsis)?) | trailing_ellipsis }
array_items = { key_item ~ (arg_ws* ~ "," ~ arg_ws* ~ key_item)* }
trailing_ellipsis = { "..." }
key_item = { !("...") ~ attr_name }


value_type = { complex_value_type | ATTR_NAMES_TYPE | ANY_TYPE | LANG_TYPE | TEXT_TYPE | array_type | object_type | REF_TYPE}
object_type = { ^"OBJECT" ~ NEWLINE ~ overlay_object_body }
array_type = { ^"ARRAY" ~ NEWLINE ~ overlay_array_type_body }
overlay_array_type_body = {
    iws* ~ ^"WITH VALUES" ~ ws+ ~ value_type ~ ws* ~ NEWLINE*
}

complex_value_type = {
    complex_type ~ ("|" ~ complex_type)+
}
complex_type = _{
    array_type |
    object_type|
    REF_TYPE |
    TEXT_TYPE |
    LANG_TYPE
}