//! A PEG copy of the OBO format 1.4 syntax.
//!
//! # See also
//!
//! - [OBO Flat File Format 1.4 syntax](http://purl.obolibrary.org/obo/oboformat/spec.html)
//! - [IRI syntax (IETF RFC 3987)](https://tools.ietf.org/html/rfc3987#section-2.2)
WHITESPACE = _{ WhitespaceChar }
// 2.1 BNF Notation
BooleanTrue = @{ "true" }
BooleanFalse = @{ "false" }
Boolean = { BooleanTrue | BooleanFalse }
AltIdTag = @{ "alt_id:" }
AutoGeneratedByTag = @{ "auto-generated-by:" }
BuiltinTag = @{ "builtin:" }
CommentTag = @{ "comment:" }
ConsiderTag = @{ "consider:" }
CreatedByTag = @{ "created_by:" }
CreationDateTag = @{ "creation_date:" }
DataVersionTag = @{ "data-version:" }
DateTag = @{ "date:" }
DisjointFromTag = @{ "disjoint_from:" }
DisjointOverTag = @{ "disjoint_over:" }
DefTag = @{ "def:" }
DefaultNamespaceTag = @{ "default-namespace:" }
DomainTag = @{ "domain:" }
EquivalentToTag = @{ "equivalent_to:" }
EquivalentToChainTag = @{ "equivalent_to_chain:" }
ExpandAssertionToTag = @{ "expand_assertion_to:" }
ExpandExpressionToTag = @{ "expand_expression_to:" }
FormatVersionTag = @{ "format-version:" }
HoldsOverChainTag = @{ "holds_over_chain:" }
IdspaceTag = @{ "idspace:" }
ImportTag = @{ "import:" }
InstanceOfTag = @{ "instance_of:" }
IntersectionOfTag = @{ "intersection_of:" }
InverseOfTag = @{ "inverse_of:"}
IsATag = @{ "is_a:" }
IsAnonymousTag = @{ "is_anonymous:"}
IsAntiSymmetricTag = @{ "is_anti_symmetric:" }
IsAsymmetricTag = @{ "is_asymmetric:" }
IsClassLevelTag = @{ "is_class_level:"}
IsCyclicTag = @{ "is_cyclic:" }
IsFunctionalTag = @{ "is_functional:" }
IsInverseFunctionalTag = @{ "is_inverse_functional:" }
IsMetadataTagTag = @{ "is_metadata_tag:" }
IsObsoleteTag = @{ "is_obsolete:" }
IsReflexiveTag = @{ "is_reflexive:" }
IsSymmetricTag = @{ "is_symmetric:" }
IsTransitiveTag = @{ "is_transitive:" }
NameTag = @{ "name:" }
NamespaceTag = @{ "namespace:" }
NamespaceIdRuleTag = @{ "namespace-id-rule:" }
OntologyTag = @{ "ontology:" }
OwlAxiomsTag = @{ "owl-axioms:" }
PropertyValueTag = @{ "property_value:" }
RelationshipTag = @{ "relationship:" }
RangeTag = @{ "range:" }
RemarkTag = @{ "remark:" }
ReplacedByTag = @{ "replaced_by:" }
SavedByTag = @{ "saved-by:" }
SubsetTag = @{ "subset:" }
SubsetdefTag = @{ "subsetdef:" }
SynonymTypedefTag = @{ "synonymtypedef:" }
SynonymTag = @{ "synonym:" }
TransitiveOverTag = @{ "transitive_over:" }
TreatXrefsAsEquivalentTag = @{ "treat-xrefs-as-equivalent:" }
TreatXrefsAsGenusDifferentiaTag = @{ "treat-xrefs-as-genus-differentia:" }
TreatXrefsAsHasSubclassTag = @{ "treat-xrefs-as-has-subclass:" }
TreatXrefsAsIsATag = @{ "treat-xrefs-as-is_a:" }
TreatXrefsAsReverseGenusDifferentiaTag = @{ "treat-xrefs-as-reverse-genus-differentia:" }
TreatXrefsAsRelationshipTag = @{ "treat-xrefs-as-relationship:" }
UnionOfTag = @{ "union_of:" }
XrefTag = @{ "xref:" }
// 2.2 Characters
// 2.2.0 Basic Characters
AlphaChar = @{ ASCII_ALPHA }
Digit = @{ ASCII_DIGIT }
// 2.2.1 Spacing Characters
WhitespaceChar = _{ " " | "\t" | "\u{0020}" }
NewlineChar = _{ "\r\n" | "\n" }
ws = _{ WhitespaceChar+ }
nl = _{ WhitespaceChar* ~ NewlineChar}
// 2.2.2 Special Characters
UniCodeChar = @{ ANY }
OboChar = @{ ("\\" ~ UniCodeChar) | ( !("\\" | NewlineChar | "!" | "{") ~ UniCodeChar) }
NonWsChar = @{ !(WhitespaceChar) ~ OboChar }
// 2.3 Line Termination
EOL = { QualifierList? ~ Comment? ~ nl }
Comment = { CommentPrefix ~ CommentText }
CommentPrefix = _{ WhitespaceChar* ~ "!" }
CommentText = ${ ( !NewlineChar ~ UniCodeChar )* }
CommentSilent = _{ Comment }
QualifierChar = @{ !("=" | "," | "}" | "{" | "\"") ~ NonWsChar }
QualifierId = @{ QualifierChar+ }
Qualifier = ${ QualifierId ~ "=" ~ QuotedString }
QualifierList = { "{" ~ Qualifier ~ ("," ~ Qualifier)* ~ "}" }
// 2.4 Clause Values
QuotedString = @{ "\"" ~ (!"\"" ~ ("\\\"" | ANY))* ~ "\"" }
UnquotedString = @{ OboChar+ }
// 2.5 Identifiers
// NB(@althonos): Since PEG are non-greedy, we sometimes have to make use of
// positive predicates to turn non-greedy rules into greedy ones.
//
// For instance, '00-01' parsed by the `IdLocal` rule can result
// in the `CanonicalIdLocal` rule with `-01` as a remaining output,
// but we actually want it as a `NonCanonicalIdLocal` without
// remaining output.
ClassId = { Id }
RelationId = { Id }
InstanceId = { Id }
SynonymTypeId = { Id }
NamespaceId = { Id }
SubsetId = { Id }
Iri = { RFC3987_Iri }
Id = ${ UrlId | PrefixedId | UnprefixedId }
UrlId = @{ RFC3987_IriScheme ~ "://" ~ RFC3987_IriAuthority ~ RFC3987_IriPathAbempty ~ ("?" ~ RFC3987_IriQuery)? ~ ("#" ~ RFC3987_IriFragment)? }
UnprefixedId = @{ ( !":" ~ NonWsChar )+ }
PrefixedId = ${ IdPrefix ~ ":" ~ IdLocal }
IdPrefix = ${ (CanonicalIdPrefix | NonCanonicalIdPrefix) }
CanonicalIdPrefix = @{ AlphaChar ~ (AlphaChar | "_")* ~ &(":" | EOI) }
NonCanonicalIdPrefix = @{ (!":" ~ NonWsChar)* }
IdLocal = ${ (CanonicalIdLocal | NonCanonicalIdLocal) }
CanonicalIdLocal = @{ ASCII_DIGIT+ ~ &(EOI | WhitespaceChar | NewlineChar) }
NonCanonicalIdLocal = @{ NonWsChar* }
// 2.6 Xref Lists
Xref = { Id ~ QuotedString? }
XrefChar = ${ !"," ~ !"]" ~ NonWsChar }
XrefId = @{ XrefChar+ }
XrefListItem = { XrefId ~ QuotedString? }
XrefList = {"[" ~ XrefListItem? ~ ("," ~ XrefListItem)* ~ "]"}
// 3 Obo Grammar
// 3.1 Obo Document Structure
OboDoc = { HeaderFrame ~ EntityFrame* ~ EOI }
EntityFrame = { TermFrame | InstanceFrame | TypedefFrame }
EntitySingle = _{ EntityFrame ~ EOI } // NB(@althonos): for iterative parsers.
// 3.2 Obo Headers
HeaderFrame = { ((HeaderClause | CommentSilent)? ~ nl)* ~ HeaderClause? ~ (nl ~ CommentSilent?)* }
NaiveDateTime = { NaiveDate ~ NaiveTime }
NaiveDate = ${ NaiveDay ~ ":" ~ NaiveMonth ~ ":" ~ NaiveYear }
NaiveTime = ${ NaiveHour ~ ":" ~ NaiveMinute }
NaiveDay = @{ ("0" ~ '1'..'9') | ('1' .. '2' ~ '0'..'9') | "30" | "31" }
NaiveMonth = @{ ("0" ~ '1'..'9') | ("1" ~ '0'..'2') }
NaiveYear = @{ Digit{4} }
NaiveHour = @{ ('0'..'1' ~ '0' .. '9') | ("2" ~ '0' .. '3') }
NaiveMinute = @{ ('0'..'5' ~ '0' .. '9') }
HeaderClause = { WhitespaceChar* ~ (
FormatVersionTag ~ UnquotedString
| DataVersionTag ~ UnquotedString
| DateTag ~ NaiveDateTime
| SavedByTag ~ UnquotedString
| AutoGeneratedByTag ~ UnquotedString
| ImportTag ~ Import
| SubsetdefTag ~ SubsetId ~ QuotedString
| SynonymTypedefTag ~ SynonymTypeId ~ QuotedString ~ SynonymScope?
| DefaultNamespaceTag ~ NamespaceId
| IdspaceTag ~ IdPrefix ~ Iri ~ QuotedString?
| NamespaceIdRuleTag ~ UnquotedString
| TreatXrefsAsEquivalentTag ~ IdPrefix
| TreatXrefsAsGenusDifferentiaTag ~ IdPrefix ~ RelationId ~ ClassId
| TreatXrefsAsReverseGenusDifferentiaTag ~ IdPrefix ~ RelationId ~ ClassId
| TreatXrefsAsRelationshipTag ~ IdPrefix ~ RelationId
| TreatXrefsAsIsATag ~ IdPrefix
| TreatXrefsAsHasSubclassTag ~ IdPrefix
// FIXME(@althonos): allow EOL
| PropertyValueTag ~ PropertyValue
| RemarkTag ~ UnquotedString
| OntologyTag ~ UnquotedString
| OwlAxiomsTag ~ UnquotedString
| Unreserved ~ ":" ~ UnquotedString
)}
Reserved = {
FormatVersionTag
| DataVersionTag
| DateTag
| SavedByTag
| AutoGeneratedByTag
| ImportTag
| SubsetdefTag
| SynonymTypedefTag
| DefaultNamespaceTag
| IdspaceTag
| NamespaceIdRuleTag
| TreatXrefsAsEquivalentTag
| TreatXrefsAsGenusDifferentiaTag
| TreatXrefsAsReverseGenusDifferentiaTag
| TreatXrefsAsRelationshipTag
| TreatXrefsAsIsATag
| TreatXrefsAsHasSubclassTag
| PropertyValueTag
| RemarkTag
| OntologyTag
| OwlAxiomsTag
}
Unreserved = @{ !Reserved ~ (!":" ~ OboChar)+ }
// 3.3 Term Frames
TermFrame = {
(CommentSilent? ~ nl)*
~ WhitespaceChar* ~ "[Term]" ~ nl
~ (CommentSilent? ~ nl)*
~ WhitespaceChar* ~ "id:" ~ ClassId ~ EOL
~ (TermClauseLine | CommentSilent? ~ nl)*
}
TermClauseLine = {
TermClause ~ EOL
}
TermClause = { WhitespaceChar* ~ (
IsAnonymousTag ~ Boolean
| NameTag ~ UnquotedString
| NamespaceTag ~ NamespaceId
| AltIdTag ~ Id
| DefTag ~ Definition
| CommentTag ~ UnquotedString
| SubsetTag ~ SubsetId
| SynonymTag ~ Synonym
| XrefTag ~ Xref
| BuiltinTag ~ Boolean
| PropertyValueTag ~ PropertyValue
| IsATag ~ ClassId
| IntersectionOfTag ~ ((RelationId ~ ClassId) | ClassId)
| UnionOfTag ~ ClassId
| EquivalentToTag ~ ClassId
| DisjointFromTag ~ ClassId
| RelationshipTag ~ RelationId ~ ClassId
| IsObsoleteTag ~ Boolean
| ReplacedByTag ~ ClassId
| ConsiderTag ~ ClassId
| CreatedByTag ~ UnquotedString
| CreationDateTag ~ CreationDate
)}
// 3.4 Typedef Frames
TypedefFrame = {
(CommentSilent? ~ nl)*
~ WhitespaceChar* ~ "[Typedef]" ~ nl
~ (CommentSilent? ~ nl)*
~ WhitespaceChar* ~ "id:" ~ ClassId ~ EOL
~ (TypedefClauseLine | CommentSilent? ~ nl)*
}
TypedefClauseLine = {
TypedefClause ~ EOL
}
TypedefClause = { WhitespaceChar* ~ (
IsAnonymousTag ~ Boolean
| NameTag ~ UnquotedString
| NamespaceTag ~ NamespaceId
| AltIdTag ~ Id
| DefTag ~ Definition
| CommentTag ~ UnquotedString
| SubsetTag ~ SubsetId
| SynonymTag ~ Synonym
| XrefTag ~ Xref
| PropertyValueTag ~ PropertyValue
| DomainTag ~ ClassId
| RangeTag ~ ClassId
| BuiltinTag ~ Boolean
| HoldsOverChainTag ~ RelationId ~ RelationId
| IsAntiSymmetricTag ~ Boolean
| IsCyclicTag ~ Boolean
| IsReflexiveTag ~ Boolean
| IsSymmetricTag ~ Boolean
| IsAsymmetricTag ~ Boolean
| IsTransitiveTag ~ Boolean
| IsFunctionalTag ~ Boolean
| IsInverseFunctionalTag ~ Boolean
| IsATag ~ RelationId
| IntersectionOfTag ~ RelationId
| UnionOfTag ~ RelationId
| EquivalentToTag ~ RelationId
| DisjointFromTag ~ RelationId
| InverseOfTag ~ RelationId
| TransitiveOverTag ~ RelationId
| EquivalentToChainTag ~ RelationId ~ RelationId
| DisjointOverTag ~ RelationId
| RelationshipTag ~ RelationId ~ RelationId
| IsObsoleteTag ~ Boolean
| ReplacedByTag ~ RelationId
| ConsiderTag ~ Id
| CreatedByTag ~ UnquotedString
| CreationDateTag ~ CreationDate
| ExpandAssertionToTag ~ QuotedString ~ XrefList
| ExpandExpressionToTag ~ QuotedString ~ XrefList
| IsMetadataTagTag ~ Boolean
| IsClassLevelTag ~ Boolean
)}
// 3.5 Instance Frames
InstanceFrame = {
(CommentSilent? ~ nl)*
~ WhitespaceChar* ~ "[Instance]" ~ nl
~ (CommentSilent? ~ nl)*
~ WhitespaceChar* ~"id:" ~ InstanceId ~ EOL
~ (InstanceClauseLine | CommentSilent? ~ nl)*
}
InstanceClauseLine = {
InstanceClause ~ EOL
}
InstanceClause = { WhitespaceChar* ~ (
IsAnonymousTag ~ Boolean
| NameTag ~ UnquotedString
| NamespaceTag ~ NamespaceId
| AltIdTag ~ Id
| DefTag ~ Definition
| CommentTag ~ UnquotedString
| SubsetTag ~ SubsetId
| SynonymTag ~ Synonym
| XrefTag ~ Xref
| PropertyValueTag ~ PropertyValue
| InstanceOfTag ~ ClassId
| RelationshipTag ~ RelationId ~ InstanceId
| CreatedByTag ~ UnquotedString
| CreationDateTag ~ CreationDate
| IsObsoleteTag ~ Boolean
| ReplacedByTag ~ InstanceId
| ConsiderTag ~ Id
)}
// 3.6 Synonym scope
ExactSynonymScope = { "EXACT" }
BroadSynonymScope = { "BROAD" }
NarrowSynonymScope = { "NARROW" }
RelatedSynonymScope = { "RELATED" }
SynonymScope = @{ ExactSynonymScope | BroadSynonymScope | NarrowSynonymScope | RelatedSynonymScope }
SynonymScopeSingle = @{ SynonymScope ~ &ws }
Synonym = { QuotedString ~ SynonymScopeSingle ~ (XrefList | (SynonymTypeId ~ XrefList)) }
// 4.0 Misc
Import = ${ Iri | Id }
Definition = { QuotedString ~ XrefList }
// WORKAROUND(@althonos): the 1.4 spec requires all property values to be
// quote-enclosed, but this is not done currently by the
// owlapi and owl2obo converters. As a workaround we can
// accept unquoted string without whitespaces as well as
// quoted strings for now.
UnquotedPropertyValueTarget = @{ NonWsChar+ }
PropertyValue = { LiteralPropertyValue | ResourcePropertyValue }
LiteralPropertyValue = { RelationId ~ (QuotedString | UnquotedPropertyValueTarget) ~ Id }
ResourcePropertyValue = { RelationId ~ Id }
// WORKAROUND(@althonos): the 1.4 spec requires that creation dates are marked
// in ISO8601 DateTime, but the 1.4 guide is vague and
// and there are some cases in the wild where the tag
// value only contains an ISO8601 Date. To accomodate
// for this, we try to parse as a DateTime first, and
// fallback to a Date if it fails.
CreationDate = ${ ISO8601_DateTime | ISO8601_Date }