annatto 0.52.0

Converts linguistic data formats based on the graphANNIS data model as intermediate representation and can apply consistency tests.
Documentation
WHITESPACE = _{ "\t" } /* we do not use this rule as it makes parsing less predictable and debugging harder  */
whitespace = _{ "\t" }
initial_padding = _{ SOI ~ (NEWLINE*)? }
final_padding = _{ (NEWLINE*)? ~ EOI }
conllu = ${ initial_padding ~ sentence+ ~ final_padding }
sentence = ${ ( text_structure | s_anno | s_comment )* ~ (multi_token | empty_node | token)+ ~ NEWLINE? }
s_anno = ${ hash ~ space* ~ name ~ space* ~ equals ~ space* ~ s_anno_value ~ NEWLINE }
s_anno_value = @{ (char | WHITESPACE )+ }
s_comment = ${ hash ~ space* ~ comment ~ NEWLINE }
comment = @{ comment_char+ }
text_structure = { hash ~ space* ~ (newdoc | newpar) ~ NEWLINE }
newdoc = { "newdoc" ~ (char | space)* }
newpar = { "newpar" }

token = ${ id ~ whitespace
~ ( no_value | form ) ~ whitespace
~ ( no_value | lemma) ~ whitespace
~ ( no_value | upos) ~ whitespace
~ ( no_value | xpos) ~ whitespace
~ ( no_value | features) ~ whitespace
~ ( no_value | head) ~ whitespace
~ ( no_value | deprel) ~ whitespace
~ ( no_value | enhanced_deps) ~ whitespace
~ ( no_value | misc)
~ NEWLINE? }
multi_token = ${ multi_id ~ whitespace
~ form ~ whitespace
~ no_value ~ whitespace
~ no_value ~ whitespace
~ no_value ~ whitespace
~ no_value ~ whitespace
~ no_value ~ whitespace
~ no_value ~ whitespace
~ no_value ~ whitespace
~ no_value ~ NEWLINE?}
empty_node = ${ empty_id ~ whitespace
~ form ~ whitespace
~ ( no_value | lemma ) ~ whitespace
~ ( no_value | upos ) ~ whitespace
~ ( no_value | xpos ) ~ whitespace
~ ( no_value | features ) ~ whitespace
~ no_value ~ whitespace
~ no_value ~ whitespace
~ ( no_value | enhanced_deps ) ~ whitespace
~ ( no_value | misc ) ~ NEWLINE? }
id = @{ ASCII_DIGIT+ }
multi_id = ${ start_id ~ to ~ end_id }
empty_id = ${ major_id ~ sub_delim ~ minor_id }
start_id = @{ ASCII_DIGIT+ }
end_id = @{ ASCII_DIGIT+ }
major_id = @{ ASCII_DIGIT+ }
minor_id = @{ ASCII_DIGIT+ }
form = ${ char+ }
lemma = ${ char+ }
upos = ${ char+ }
xpos = ${ char+ }
features = ${ (feature ~ ( delim ~ feature )* ) }
feature = ${ name ~ equals ~ value } 
name = @{ feat_char+ }
value = @{ feat_char+ }
head = @{ ASCII_DIGIT+ }
deprel = ${ char+ }
enhanced_deps = ${ enhanced_dep ~ (delim ~ enhanced_dep)* }
enhanced_dep = ${ enhanced_head ~ colon ~ enhanced_rel }
enhanced_head = ${ ASCII_DIGIT+ }
enhanced_rel = @{ enhanced_char+ }
misc = ${ feature ~ ( delim ~ feature )* }

char = _{ !(WHITESPACE | NEWLINE | EOI) ~ ANY }
feat_char = { !(NEWLINE | WHITESPACE | space | equals | delim ) ~ ANY }
comment_char = { !(NEWLINE | equals ) ~ ANY }
enhanced_char = _{ !(colon | delim | WHITESPACE | NEWLINE ) ~ ANY }
colon = _{ ":" }
delim = _{ "|" }
equals = _{ "=" }
to = _{"-"}
sub_delim = _{ "." }
hash = _{ "#" }
space = _{ " " }
no_value = @{ "_" ~ &( WHITESPACE | NEWLINE | EOI ) }