//! A PEG copy of the OBO format 1.4 syntax.
//!
//! # See also
//!
//! - [OBO Flat File Format 1.4 syntax](http://purl.obolibrary.org/obo/oboformat/spec.html)
//! - [IRI syntax (IETF RFC 3987)](https://tools.ietf.org/html/rfc3987#section-2.2)
WHITESPACE = _{ WhitespaceChar }
// 2.1 BNF Notation
Boolean = @{ "true" | "false" }
AltIdTag = @{ "alt_id:" }
AutoGeneratedByTag = @{ "auto-generated-by:" }
BuiltinTag = @{ "builtin:" }
CommentTag = @{ "comment:" }
ConsiderTag = @{ "consider:" }
CreatedByTag = @{ "created_by:" }
CreationDateTag = @{ "creation_date:" }
DataVersionTag = @{ "data-version:" }
DateTag = @{ "date:" }
DisjointFromTag = @{ "disjoint_from:" }
DisjointOverTag = @{ "disjoint_over:" }
DefTag = @{ "def:" }
DefaultNamespaceTag = @{ "default-namespace:" }
DomainTag = @{ "domain:" }
EquivalentToTag = @{ "equivalent_to:" }
EquivalentToChainTag = @{ "equivalent_to_chain:" }
ExpandAssertionToTag = @{ "expand_assertion_to:" }
ExpandExpressionToTag = @{ "expand_expression_to:" }
FormatVersionTag = @{ "format-version:" }
HoldsOverChainTag = @{ "holds_over_chain:" }
IdspaceTag = @{ "idspace:" }
ImportTag = @{ "import:" }
InstanceOfTag = @{ "instance_of:" }
IntersectionOfTag = @{ "intersection_of:" }
InverseOfTag = @{ "inverse_of:"}
IsATag = @{ "is_a:" }
IsAnonymousTag = @{ "is_anonymous:"}
IsAntiSymmetricTag = @{ "is_anti_symmetric:" }
IsAsymmetricTag = @{ "is_asymmetric:" }
IsClassLevelTag = @{ "is_class_level:"}
IsCyclicTag = @{ "is_cyclic:" }
IsFunctionalTag = @{ "is_functional:" }
IsInverseFunctionalTag = @{ "is_inverse_functional:" }
IsMetadataTagTag = @{ "is_metadata_tag:" }
IsObsoleteTag = @{ "is_obsolete:" }
IsReflexiveTag = @{ "is_reflexive:" }
IsSymmetricTag = @{ "is_symmetric:" }
IsTransitiveTag = @{ "is_transitive:" }
NameTag = @{ "name:" }
NamespaceTag = @{ "namespace:" }
NamespaceIdRuleTag = @{ "namespace-id-rule:" }
OntologyTag = @{ "ontology:" }
OwlAxiomsTag = @{ "owl-axioms:" }
PropertyValueTag = @{ "property_value:" }
RelationshipTag = @{ "relationship:" }
RangeTag = @{ "range:" }
RemarkTag = @{ "remark:" }
ReplacedByTag = @{ "replaced_by:" }
SavedByTag = @{ "saved-by:" }
SubsetTag = @{ "subset:" }
SubsetdefTag = @{ "subsetdef:" }
SynonymTypedefTag = @{ "synonymtypedef:" }
SynonymTag = @{ "synonym:" }
TransitiveOverTag = @{ "transitive_over:" }
TreatXrefsAsEquivalentTag = @{ "treat-xrefs-as-equivalent:" }
TreatXrefsAsGenusDifferentiaTag = @{ "treat-xrefs-as-genus-differentia:" }
TreatXrefsAsHasSubclassTag = @{ "treat-xrefs-as-has-subclass:" }
TreatXrefsAsIsATag = @{ "treat-xrefs-as-is_a:" }
TreatXrefsAsReverseGenusDifferentiaTag = @{ "treat-xrefs-as-reverse-genus-differentia:" }
TreatXrefsAsRelationshipTag = @{ "treat-xrefs-as-relationship:" }
UnionOfTag = @{ "union_of:" }
XrefTag = @{ "xref:" }
// 2.2 Characters
// 2.2.0 Basic Characters
AlphaChar = @{ ASCII_ALPHA }
Digit = @{ ASCII_DIGIT }
// 2.2.1 Spacing Characters
WhitespaceChar = _{ " " | "\t" | "\u{0020}" }
NewlineChar = _{ "\r" | "\n" | "\u{000c}"}
ws = _{ WhitespaceChar+ }
nl = _{ WhitespaceChar* ~ NewlineChar}
// 2.2.2 Special Characters
UniCodeChar = @{ ANY }
OboChar = @{ ("\\" ~ UniCodeChar) | (!"\\" ~ !"\n" ~ !"!" ~ UniCodeChar) }
NonWsChar = @{ !(WhitespaceChar) ~ !(NewlineChar) ~ OboChar }
// 2.3 Line Termination
EOL = { QualifierList? ~ HiddenComment? ~ nl }
HiddenComment = ${ "!" ~ ( !NewlineChar ~ UniCodeChar )* }
QualifierChar = @{ !("=" | "," | "}" | "{" | "\"") ~ NonWsChar }
QualifierId = @{ QualifierChar+ }
Qualifier = ${ QualifierId ~ "=" ~ QuotedString }
QualifierList = { "{" ~ Qualifier ~ ("," ~ Qualifier)* ~ "}" }
// 2.4 Clause Values
QuotedString = @{ "\"" ~ (!"\"" ~ ("!" | OboChar))* ~ "\"" }
UnquotedString = @{ OboChar+ }
// 2.5 Identifiers
// NB(@althonos): Since PEG are non-greedy, we sometimes have to make use of
// positive predicates to turn non-greedy rules into greedy ones.
//
// For instance, '00-01' parsed by the `IdLocal` rule can result
// in the `CanonicalIdLocal` rule with `-01` as a remaining output,
// but we actually want it as a `NonCanonicalIdLocal` without
// remaining output.
ClassId = { !"{" ~ Id }
RelationId = { Id }
InstanceId = { Id }
SynonymTypeId = { !"[" ~ Id }
NamespaceId = { Id }
SubsetId = { Id }
Id = ${ UrlId | PrefixedId | UnprefixedId }
UrlId = @{ ("http" | "https") ~ ":" ~ (NonWsChar)* }
UnprefixedId = @{ ( !":" ~ NonWsChar )+ }
PrefixedId = ${ IdPrefix ~ ":" ~ IdLocal }
IdPrefix = ${ (CanonicalIdPrefix | NonCanonicalIdPrefix) }
CanonicalIdPrefix = @{ AlphaChar ~ (AlphaChar | "_")* ~ &(":" | EOI) }
NonCanonicalIdPrefix = @{ (!":" ~ NonWsChar)* }
IdLocal = ${ (CanonicalIdLocal | NonCanonicalIdLocal) }
CanonicalIdLocal = @{ ASCII_DIGIT+ ~ &(EOI | WhitespaceChar | NewlineChar) }
NonCanonicalIdLocal = @{ NonWsChar* }
// 2.6 Xref Lists
Xref = { Id ~ QuotedString? }
XrefChar = ${ !"," ~ !"]" ~ NonWsChar }
XrefId = @{ XrefChar+ }
XrefListItem = { XrefId ~ QuotedString? }
XrefList = {"[" ~ XrefListItem? ~ ("," ~ XrefListItem)* ~ "]"}
// 3 Obo Grammar
// 3.1 Obo Document Structure
OboDoc = { HeaderFrame ~ EntityFrame* ~ EOI }
EntityFrame = { TermFrame | InstanceFrame | TypedefFrame }
EntitySingle = _{ EntityFrame ~ EOI } // NB(@althonos): for iterative parsers.
// 3.2 Obo Headers
HeaderFrame = { (HeaderClause? ~ nl)* ~ HeaderClause? }
NaiveDateTime = { NaiveDate ~ NaiveTime }
NaiveDate = ${ NaiveDay ~ ":" ~ NaiveMonth ~ ":" ~ NaiveYear }
NaiveTime = ${ NaiveHour ~ ":" ~ NaiveMinute }
NaiveDay = @{ ("0" ~ '1'..'9') | ('1' .. '2' ~ '0'..'9') | "30" | "31" }
NaiveMonth = @{ ("0" ~ '1'..'9') | ("1" ~ '0'..'2') }
NaiveYear = @{ Digit{4} }
NaiveHour = @{ ('0'..'1' ~ '0' .. '9') | ("2" ~ '0' .. '3') }
NaiveMinute = @{ ('0'..'5' ~ '0' .. '9') }
HeaderClause = {
FormatVersionTag ~ UnquotedString
| DataVersionTag ~ UnquotedString
| DateTag ~ NaiveDateTime
| SavedByTag ~ UnquotedString
| AutoGeneratedByTag ~ UnquotedString
| ImportTag ~ Import
| SubsetdefTag ~ SubsetId ~ QuotedString
| SynonymTypedefTag ~ SynonymTypeId ~ QuotedString ~ SynonymScope?
| DefaultNamespaceTag ~ NamespaceId
| IdspaceTag ~ IdPrefix ~ Iri ~ QuotedString?
| NamespaceIdRuleTag ~ UnquotedString
| TreatXrefsAsEquivalentTag ~ IdPrefix
| TreatXrefsAsGenusDifferentiaTag ~ IdPrefix ~ RelationId ~ ClassId
| TreatXrefsAsReverseGenusDifferentiaTag ~ IdPrefix ~ RelationId ~ ClassId
| TreatXrefsAsRelationshipTag ~ IdPrefix ~ RelationId
| TreatXrefsAsIsATag ~ IdPrefix
| TreatXrefsAsHasSubclassTag ~ IdPrefix
// FIXME(@althonos): allow EOL
| PropertyValueTag ~ PropertyValue
| RemarkTag ~ UnquotedString
| OntologyTag ~ UnquotedString
| OwlAxiomsTag ~ UnquotedString
| Unreserved ~ ":" ~ UnquotedString
}
Unreserved = @{
!FormatVersionTag ~
!DataVersionTag ~
!DateTag ~
!SavedByTag ~
!AutoGeneratedByTag ~
!ImportTag ~
!SubsetdefTag ~
!SynonymTypedefTag ~
!DefaultNamespaceTag ~
!IdspaceTag ~
!NamespaceIdRuleTag ~
!TreatXrefsAsEquivalentTag ~
!TreatXrefsAsGenusDifferentiaTag ~
!TreatXrefsAsReverseGenusDifferentiaTag ~
!TreatXrefsAsRelationshipTag ~
!TreatXrefsAsIsATag ~
!TreatXrefsAsHasSubclassTag ~
!PropertyValueTag ~
!RemarkTag ~
!OntologyTag ~
!OwlAxiomsTag ~
(!":" ~ OboChar)+
}
// 3.3 Term Frames
TermFrame = {
"[Term]" ~ nl
~ "id:" ~ ClassId ~ EOL
~ (TermClauseLine | nl)*
}
TermClauseLine = {
TermClause ~ EOL
}
TermClause = {
IsAnonymousTag ~ Boolean
| NameTag ~ UnquotedString
| NamespaceTag ~ NamespaceId
| AltIdTag ~ Id
| DefTag ~ QuotedString ~ XrefList
| CommentTag ~ UnquotedString
| SubsetTag ~ SubsetId
| SynonymTag ~ Synonym
| XrefTag ~ Xref
| BuiltinTag ~ Boolean
| PropertyValueTag ~ PropertyValue
| IsATag ~ ClassId
| IntersectionOfTag ~ ((RelationId ~ ClassId) | ClassId)
| UnionOfTag ~ ClassId
| EquivalentToTag ~ ClassId
| DisjointFromTag ~ ClassId
| RelationshipTag ~ RelationId ~ ClassId
| IsObsoleteTag ~ Boolean
| ReplacedByTag ~ ClassId
| ConsiderTag ~ ClassId
| CreatedByTag ~ UnquotedString
| CreationDateTag ~ Iso8601DateTime
}
// 3.4 Typedef Frames
TypedefFrame = {
"[Typedef]" ~ nl
~ "id:" ~ ClassId ~ EOL
~ (TypedefClauseLine | nl)*
}
TypedefClauseLine = {
TypedefClause ~ EOL
}
TypedefClause = {
IsAnonymousTag ~ Boolean
| NameTag ~ UnquotedString
| NamespaceTag ~ NamespaceId
| AltIdTag ~ Id
| DefTag ~ QuotedString ~ XrefList
| CommentTag ~ UnquotedString
| SubsetTag ~ SubsetId
| SynonymTag ~ Synonym
| XrefTag ~ Xref
| PropertyValueTag ~ PropertyValue
| DomainTag ~ ClassId
| RangeTag ~ ClassId
| BuiltinTag ~ Boolean
| HoldsOverChainTag ~ RelationId ~ RelationId
| IsAntiSymmetricTag ~ Boolean
| IsCyclicTag ~ Boolean
| IsReflexiveTag ~ Boolean
| IsSymmetricTag ~ Boolean
| IsAsymmetricTag ~ Boolean
| IsTransitiveTag ~ Boolean
| IsFunctionalTag ~ Boolean
| IsInverseFunctionalTag ~ Boolean
| IsATag ~ RelationId
| IntersectionOfTag ~ RelationId
| UnionOfTag ~ RelationId
| EquivalentToTag ~ RelationId
| DisjointFromTag ~ RelationId
| InverseOfTag ~ RelationId
| TransitiveOverTag ~ RelationId
| EquivalentToChainTag ~ RelationId ~ RelationId
| DisjointOverTag ~ RelationId
| RelationshipTag ~ RelationId ~ RelationId
| IsObsoleteTag ~ Boolean
| ReplacedByTag ~ RelationId
| ConsiderTag ~ Id
| CreatedByTag ~ UnquotedString
| CreationDateTag ~ Iso8601DateTime
| ExpandAssertionToTag ~ QuotedString ~ XrefList
| ExpandExpressionToTag ~ QuotedString ~ XrefList
| IsMetadataTagTag ~ Boolean
| IsClassLevelTag ~ Boolean
}
// 3.5 Instance Frames
InstanceFrame = {
"[Instance]" ~ nl
~ "id:" ~ InstanceId ~ EOL
~ (InstanceClauseLine | nl)*
}
InstanceClauseLine = {
InstanceClause ~ EOL
}
InstanceClause = {
IsAnonymousTag ~ Boolean
| NameTag ~ UnquotedString
| NamespaceTag ~ NamespaceId
| AltIdTag ~ Id
| DefTag ~ QuotedString ~ XrefList
| CommentTag ~ UnquotedString
| SubsetTag ~ SubsetId
| SynonymTag ~ Synonym
| XrefTag ~ Xref
| PropertyValueTag ~ PropertyValue
| InstanceOfTag ~ ClassId
| RelationshipTag ~ RelationId ~ InstanceId
| CreatedByTag ~ UnquotedString
| CreationDateTag ~ Iso8601DateTime
| IsObsoleteTag ~ Boolean
| ReplacedByTag ~ InstanceId
| ConsiderTag ~ Id
}
// 3.6 Synonym scope
SynonymScope = @{ "EXACT" | "BROAD" | "NARROW" | "RELATED" }
SynonymScopeSingle = @{ ("EXACT" | "BROAD" | "NARROW" | "RELATED") ~ &ws }
Synonym = { QuotedString ~ SynonymScopeSingle ~ SynonymTypeId? ~ XrefList }
// 4.0 Misc
Import = ${ Iri | Id }
// WORKAROUND(@althonos): the 1.4 spec requires all property values to be
// quote-enclosed, but this is not done currently by the
// owlapi and owl2obo converters. As a workaround we can
// accept unquoted string without whitespaces as well as
// quoted strings for now.
PropertyValue = { RelationId ~ (((QuotedString | PvValue) ~ XsdDatatype) | Id) }
PvValue = @{ NonWsChar+ }
XsdDatatype = @{ "xsd:" ~ ASCII_ALPHANUMERIC+ }
// Annex I: Iri Grammar from [RFC3987]
//
// NB(@althonos): Since the `iri_string` crate is then used to parse the IRI,
// there is no need for a proper tokenization of the IRI components.
//
// Ideally, this would reside in another grammar file, but Pest
// 2.0 doesn't support sharing rules between files.
Iri = @{ IriScheme ~ ":" ~ IriHierPart ~ ("?" ~ IriQuery)? ~ ("#" ~ IriFragment)? }
IriHierPart = {
("//" ~ IriAuthority ~ IriPathAbempty )
| IriPathAbsolute
| IriPathRootless
| IriPathEmpty
}
IriAuthority = ${ (IriUserInfo ~ "@")? ~ IriHost ~ (":" ~ IriPort)?}
IriUserInfo = ${ (IriUnreserved | IriPctEncoded | IriSubDelims | ":")* }
IriHost = ${ IriIpLiteral | IriIpv4Address | IriRegName }
IriRegName = ${ (IriUnreserved | IriPctEncoded | IriSubDelims)* }
IriPath = ${IriPathAbempty | IriPathAbsolute | IriPathNoScheme | IriPathRootless | IriPathEmpty}
IriPathAbempty = ${ ("/" ~ IriSegment)+ }
IriPathAbsolute = ${ "/" ~ (IriSegmentNz ~ ("/" ~ IriSegment)* )? }
IriPathNoScheme = ${ IriSegmentNzNc ~ ("/" ~ IriSegment)* }
IriPathRootless = ${ IriSegmentNz ~ ("/" ~ IriSegment)* }
IriPathEmpty = ${ "0" ~ IriIpChar}
IriSegment = @{ IriIpChar* }
IriSegmentNz = @{ IriIpChar+ }
IriSegmentNzNc = @{ (IriUnreserved | IriPctEncoded | IriSubDelims | "@")+ }
IriQuery = @{ (IriIpChar | IriPrivate | "/" | "?")* }
IriFragment = @{ (IriIpChar | "/" | "?")* }
IriScheme = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "+" | "-" | ".")* }
IriPort = @{ ASCII_DIGIT* }
IriPrivate = ${ '\u{E000}'..'\u{F8FF}' | '\u{F0000}'..'\u{FFFFD}' | '\u{100000}'..'\u{10FFFD}' }
IriPctEncoded = ${ "%" ~ ASCII_HEX_DIGIT ~ ASCII_HEX_DIGIT }
IriUnreserved = @{ ASCII_ALPHA | ASCII_DIGIT | "-" | "." | "_" | "~" }
IriReserved = @{ IriGenDelims | IriSubDelims }
IriGenDelims = @{":" | "/" | "?" | "#" | "[" | "]" | "@"}
IriSubDelims = @{"!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "="}
IriDecOctet = ${
ASCII_DIGIT
| (('1' .. '9') ~ ASCII_DIGIT)
| ("1" ~ ASCII_DIGIT ~ ASCII_DIGIT)
| ("2" ~ ('0' .. '4') ~ ASCII_DIGIT)
| ("25" ~ ('0' .. '5'))
}
IriIpChar = { IriUnreserved | IriPctEncoded | IriSubDelims | ":" | "@" }
IriIpLiteral = { "[" ~ (IriIpv6Address ~ IriIpvFutureAddress)* ~ "]" }
IriIpv6H16 = ${ ASCII_HEX_DIGIT{1,4} }
IriIpv6Ls32 = { (IriIpv6H16 ~ ":" ~ IriIpv6H16) | IriIpv4Address }
IriIpv4Address = ${ IriDecOctet ~ "." ~ IriDecOctet ~ "." ~ IriDecOctet ~ "." ~ IriDecOctet }
IriIpvFutureAddress = ${ "v" ~ ASCII_HEX_DIGIT+ ~ "." ~ (IriUnreserved | IriSubDelims | ":")+ }
IriIpv6Address = ${
( (IriIpv6H16 ~ ":"){6} ~ IriIpv6Ls32 )
| ( "::" ~ (IriIpv6H16 ~ ":"){5} ~ IriIpv6Ls32 )
| ( IriIpv6H16? ~ "::" ~ (IriIpv6H16 ~ ":"){4} ~ IriIpv6Ls32 )
| ( ((IriIpv6H16 ~ ":"){1} ~ IriIpv6H16)? ~ "::" ~ (IriIpv6H16 ~ ":"){3} ~ IriIpv6Ls32)
| ( ((IriIpv6H16 ~ ":"){2} ~ IriIpv6H16)? ~ "::" ~ (IriIpv6H16 ~ ":"){2} ~ IriIpv6Ls32)
| ( ((IriIpv6H16 ~ ":"){3} ~ IriIpv6H16)? ~ "::" ~ IriIpv6H16 ~ ":" ~ IriIpv6Ls32)
| ( ((IriIpv6H16 ~ ":"){4} ~ IriIpv6H16)? ~ "::" ~ IriIpv6Ls32)
| ( ((IriIpv6H16 ~ ":"){5} ~ IriIpv6H16)? ~ "::" ~ IriIpv6H16)
| ( ((IriIpv6H16 ~ ":"){6} ~ IriIpv6H16)? ~ "::")
}
// Annex II: ISO-8601 Grammar for DateTime w/ Timezone
Iso8601DateTime = ${Iso8601Date ~ "T" ~ Iso8601Time ~ Iso8601TimeZone? }
Iso8601Date = ${ Iso8601Year ~ Iso8601DateSep? ~ Iso8601Month ~ Iso8601DateSep? ~ Iso8601Day }
Iso8601DateSep = _{ "-" | "–" }
Iso8601Year = @{ ASCII_DIGIT{4} }
Iso8601Month = @{ ASCII_DIGIT{1,2} }
Iso8601Day = @{ ASCII_DIGIT{1,2} }
Iso8601Time = ${ Iso8601Hour ~ Iso8601TimeSep? ~ Iso8601Minute ~ Iso8601TimeSep? ~ Iso8601Second ~ Iso8601Fraction? }
Iso8601TimeSep = _{":"}
Iso8601DecSep = _{"." | ","}
Iso8601Hour = @{ (('0'..'1') ~ ('0'..'9')) | "2" ~ '0'..'4' }
Iso8601Minute = @{ ('0'..'5') ~ ('0'..'9') }
Iso8601Second = @{ (('0'..'5') ~ ('0'..'9')) | "60" }
Iso8601Fraction = @{ Iso8601DecSep ~ ('0'..'9')+ }
Iso8601TimeZone = ${ "Z" | (("+" | "-") ~ Iso8601Hour ~ (":"? ~ Iso8601Minute)? ) }