fastobo-syntax 0.8.1

PEG Syntax and pest parser for the OBO flat file format 1.4
Documentation
//! A PEG copy of the OBO format 1.4 syntax.
//!
//! # See also
//!
//! - [OBO Flat File Format 1.4 syntax](http://purl.obolibrary.org/obo/oboformat/spec.html)
//! - [IRI syntax (IETF RFC 3987)](https://tools.ietf.org/html/rfc3987#section-2.2)


WHITESPACE = _{ WhitespaceChar }


// 2.1 BNF Notation

BooleanTrue  = @{ "true" }
BooleanFalse = @{ "false" }
Boolean = { BooleanTrue | BooleanFalse }

AltIdTag                               = @{ "alt_id:" }
AutoGeneratedByTag                     = @{ "auto-generated-by:" }
BuiltinTag                             = @{ "builtin:" }
CommentTag                             = @{ "comment:" }
ConsiderTag                            = @{ "consider:" }
CreatedByTag                           = @{ "created_by:" }
CreationDateTag                        = @{ "creation_date:" }
DataVersionTag                         = @{ "data-version:" }
DateTag                                = @{ "date:" }
DisjointFromTag                        = @{ "disjoint_from:" }
DisjointOverTag                        = @{ "disjoint_over:" }
DefTag                                 = @{ "def:" }
DefaultNamespaceTag                    = @{ "default-namespace:" }
DomainTag                              = @{ "domain:" }
EquivalentToTag                        = @{ "equivalent_to:" }
EquivalentToChainTag                   = @{ "equivalent_to_chain:" }
ExpandAssertionToTag                   = @{ "expand_assertion_to:" }
ExpandExpressionToTag                  = @{ "expand_expression_to:" }
FormatVersionTag                       = @{ "format-version:" }
HoldsOverChainTag                      = @{ "holds_over_chain:" }
IdspaceTag                             = @{ "idspace:" }
ImportTag                              = @{ "import:" }
InstanceOfTag                          = @{ "instance_of:" }
IntersectionOfTag                      = @{ "intersection_of:" }
InverseOfTag                           = @{ "inverse_of:"}
IsATag                                 = @{ "is_a:" }
IsAnonymousTag                         = @{ "is_anonymous:"}
IsAntiSymmetricTag                     = @{ "is_anti_symmetric:" }
IsAsymmetricTag                        = @{ "is_asymmetric:" }
IsClassLevelTag                        = @{ "is_class_level:"}
IsCyclicTag                            = @{ "is_cyclic:" }
IsFunctionalTag                        = @{ "is_functional:" }
IsInverseFunctionalTag                 = @{ "is_inverse_functional:" }
IsMetadataTagTag                       = @{ "is_metadata_tag:" }
IsObsoleteTag                          = @{ "is_obsolete:" }
IsReflexiveTag                         = @{ "is_reflexive:" }
IsSymmetricTag                         = @{ "is_symmetric:" }
IsTransitiveTag                        = @{ "is_transitive:" }
NameTag                                = @{ "name:" }
NamespaceTag                           = @{ "namespace:" }
NamespaceIdRuleTag                     = @{ "namespace-id-rule:" }
OntologyTag                            = @{ "ontology:" }
OwlAxiomsTag                           = @{ "owl-axioms:" }
PropertyValueTag                       = @{ "property_value:" }
RelationshipTag                        = @{ "relationship:" }
RangeTag                               = @{ "range:" }
RemarkTag                              = @{ "remark:" }
ReplacedByTag                          = @{ "replaced_by:" }
SavedByTag                             = @{ "saved-by:" }
SubsetTag                              = @{ "subset:" }
SubsetdefTag                           = @{ "subsetdef:" }
SynonymTypedefTag                      = @{ "synonymtypedef:" }
SynonymTag                             = @{ "synonym:" }
TransitiveOverTag                      = @{ "transitive_over:" }
TreatXrefsAsEquivalentTag              = @{ "treat-xrefs-as-equivalent:" }
TreatXrefsAsGenusDifferentiaTag        = @{ "treat-xrefs-as-genus-differentia:" }
TreatXrefsAsHasSubclassTag             = @{ "treat-xrefs-as-has-subclass:" }
TreatXrefsAsIsATag                     = @{ "treat-xrefs-as-is_a:" }
TreatXrefsAsReverseGenusDifferentiaTag = @{ "treat-xrefs-as-reverse-genus-differentia:" }
TreatXrefsAsRelationshipTag            = @{ "treat-xrefs-as-relationship:" }
UnionOfTag                             = @{ "union_of:" }
XrefTag                                = @{ "xref:" }

// 2.2 Characters

// 2.2.0 Basic Characters

AlphaChar = @{ ASCII_ALPHA }
Digit     = @{ ASCII_DIGIT }

// 2.2.1 Spacing Characters

WhitespaceChar = _{ " " | "\t" | "\u{0020}" }
NewlineChar    = _{ "\r\n" | "\n" }
ws             = _{ WhitespaceChar+ }
nl             = _{ WhitespaceChar* ~ NewlineChar}

// 2.2.2 Special Characters

UniCodeChar = @{ ANY }
OboChar     = @{ ("\\" ~ UniCodeChar) | ( !("\\" | NewlineChar | "!" | "{") ~ UniCodeChar) }
NonWsChar   = @{ !(WhitespaceChar) ~ OboChar }


// 2.3 Line Termination

EOL = { QualifierList? ~ Comment? ~ nl  }

Comment        =  { CommentPrefix ~ CommentText }
CommentPrefix  = _{ WhitespaceChar* ~ "!" }
CommentText    = ${ ( !NewlineChar ~ UniCodeChar )* }
CommentSilent  = _{ Comment }

QualifierChar  = @{ !("=" | "," | "}" | "{" | "\"") ~ NonWsChar }
QualifierId    = @{ QualifierChar+ }
Qualifier      = ${ QualifierId ~ "=" ~ QuotedString }
QualifierList  =  { "{" ~ Qualifier ~ ("," ~ Qualifier)* ~ "}" }

// 2.4 Clause Values

QuotedString   = @{ "\"" ~ (!"\"" ~ ("\\\"" | ANY))* ~ "\"" }
UnquotedString = @{ OboChar+ }


// 2.5 Identifiers

// NB(@althonos): Since PEG are non-greedy, we sometimes have to make use of
//                positive predicates to turn non-greedy rules into greedy ones.
//
//                For instance, '00-01' parsed by the `IdLocal` rule can result
//                in the `CanonicalIdLocal` rule with `-01` as a remaining output,
//                but we actually want it as a `NonCanonicalIdLocal` without
//                remaining output.

ClassId       = { Id }
RelationId    = { Id }
InstanceId    = { Id }
SynonymTypeId = { Id }
NamespaceId   = { Id }
SubsetId      = { Id }

Iri          =  { RFC3987_Iri }
Id           = ${ UrlId | PrefixedId | UnprefixedId }
UrlId        = @{ RFC3987_IriScheme ~ "://" ~ RFC3987_IriAuthority ~ RFC3987_IriPathAbempty ~ ("?" ~ RFC3987_IriQuery)? ~ ("#" ~ RFC3987_IriFragment)? }
UnprefixedId = @{ ( !":" ~ NonWsChar )+ }
PrefixedId   = ${ IdPrefix ~ ":" ~ IdLocal }

IdPrefix             = ${ (CanonicalIdPrefix | NonCanonicalIdPrefix) }
CanonicalIdPrefix    = @{ AlphaChar ~ (AlphaChar | "_")* ~ &(":" | EOI) }
NonCanonicalIdPrefix = @{ (!":" ~ NonWsChar)* }

IdLocal             = ${ (CanonicalIdLocal | NonCanonicalIdLocal) }
CanonicalIdLocal    = @{ ASCII_DIGIT+ ~ &(EOI | WhitespaceChar | NewlineChar) }
NonCanonicalIdLocal = @{ NonWsChar* }


// 2.6 Xref Lists

Xref         = { Id ~ QuotedString? }

XrefChar     = ${ !"," ~ !"]" ~ NonWsChar }
XrefId       = @{ XrefChar+ }
XrefListItem = { XrefId ~ QuotedString? }
XrefList     = {"[" ~ XrefListItem? ~ ("," ~ XrefListItem)* ~ "]"}

// 3 Obo Grammar

// 3.1 Obo Document Structure

OboDoc      = { HeaderFrame ~ EntityFrame* ~ EOI }
EntityFrame = { TermFrame | InstanceFrame | TypedefFrame }

EntitySingle = _{ EntityFrame ~ EOI }  // NB(@althonos): for iterative parsers.


// 3.2 Obo Headers

HeaderFrame = { ((HeaderClause | CommentSilent)? ~ nl)* ~ HeaderClause? ~ (nl ~ CommentSilent?)* }

NaiveDateTime =  { NaiveDate ~ NaiveTime }
NaiveDate     = ${ NaiveDay ~ ":" ~ NaiveMonth ~ ":" ~ NaiveYear }
NaiveTime     = ${ NaiveHour ~ ":" ~ NaiveMinute }
NaiveDay      = @{ ("0" ~ '1'..'9') | ('1' .. '2' ~ '0'..'9') | "30" | "31" }
NaiveMonth    = @{ ("0" ~ '1'..'9') | ("1" ~ '0'..'2') }
NaiveYear     = @{ Digit{4} }
NaiveHour     = @{ ('0'..'1' ~ '0' .. '9') | ("2" ~ '0' .. '3') }
NaiveMinute   = @{ ('0'..'5' ~ '0' .. '9') }

HeaderClause = { WhitespaceChar* ~ (
    FormatVersionTag ~ UnquotedString
  | DataVersionTag ~ UnquotedString
  | DateTag ~ NaiveDateTime
  | SavedByTag ~ UnquotedString
  | AutoGeneratedByTag ~ UnquotedString
  | ImportTag ~ Import
  | SubsetdefTag ~ SubsetId ~ QuotedString
  | SynonymTypedefTag ~ SynonymTypeId ~ QuotedString ~ SynonymScope?
  | DefaultNamespaceTag ~ NamespaceId
  | IdspaceTag ~ IdPrefix ~ Iri ~ QuotedString?
  | NamespaceIdRuleTag ~ UnquotedString
  | TreatXrefsAsEquivalentTag ~ IdPrefix
  | TreatXrefsAsGenusDifferentiaTag ~ IdPrefix ~ RelationId ~ ClassId
  | TreatXrefsAsReverseGenusDifferentiaTag ~ IdPrefix ~ RelationId ~ ClassId
  | TreatXrefsAsRelationshipTag ~ IdPrefix ~ RelationId
  | TreatXrefsAsIsATag ~ IdPrefix
  | TreatXrefsAsHasSubclassTag ~ IdPrefix
  // FIXME(@althonos): allow EOL
  | PropertyValueTag ~ PropertyValue
  | RemarkTag ~ UnquotedString
  | OntologyTag ~ UnquotedString
  | OwlAxiomsTag ~ UnquotedString
  | Unreserved ~ ":" ~ UnquotedString
)}

Reserved = {
  FormatVersionTag
  | DataVersionTag
  | DateTag
  | SavedByTag
  | AutoGeneratedByTag
  | ImportTag
  | SubsetdefTag
  | SynonymTypedefTag
  | DefaultNamespaceTag
  | IdspaceTag
  | NamespaceIdRuleTag
  | TreatXrefsAsEquivalentTag
  | TreatXrefsAsGenusDifferentiaTag
  | TreatXrefsAsReverseGenusDifferentiaTag
  | TreatXrefsAsRelationshipTag
  | TreatXrefsAsIsATag
  | TreatXrefsAsHasSubclassTag
  | PropertyValueTag
  | RemarkTag
  | OntologyTag
  | OwlAxiomsTag
}

Unreserved = @{ !Reserved ~ (!":" ~ OboChar)+ }


// 3.3 Term Frames

TermFrame = {
    (CommentSilent? ~ nl)*
    ~ WhitespaceChar* ~ "[Term]" ~ nl
    ~ (CommentSilent? ~ nl)*
    ~ WhitespaceChar* ~ "id:" ~ ClassId ~ EOL
    ~ (TermClauseLine | CommentSilent? ~ nl)*
}
TermClauseLine = {
    TermClause ~ EOL
}
TermClause = { WhitespaceChar* ~ (
    IsAnonymousTag ~ Boolean
  | NameTag ~ UnquotedString
  | NamespaceTag ~ NamespaceId
  | AltIdTag ~ Id
  | DefTag ~ Definition
  | CommentTag ~ UnquotedString
  | SubsetTag ~ SubsetId
  | SynonymTag ~ Synonym
  | XrefTag ~ Xref
  | BuiltinTag ~ Boolean
  | PropertyValueTag ~ PropertyValue
  | IsATag ~ ClassId
  | IntersectionOfTag ~ ((RelationId ~ ClassId) | ClassId)
  | UnionOfTag ~ ClassId
  | EquivalentToTag ~ ClassId
  | DisjointFromTag ~ ClassId
  | RelationshipTag ~ RelationId ~ ClassId
  | IsObsoleteTag ~ Boolean
  | ReplacedByTag ~ ClassId
  | ConsiderTag ~ ClassId
  | CreatedByTag ~ UnquotedString
  | CreationDateTag ~ CreationDate
)}


// 3.4 Typedef Frames

TypedefFrame = {
    (CommentSilent? ~ nl)*
    ~ WhitespaceChar* ~ "[Typedef]" ~ nl
    ~ (CommentSilent? ~ nl)*
    ~ WhitespaceChar* ~ "id:" ~ ClassId ~ EOL
    ~ (TypedefClauseLine | CommentSilent? ~ nl)*
}
TypedefClauseLine = {
    TypedefClause ~ EOL
}
TypedefClause = { WhitespaceChar* ~ (
    IsAnonymousTag ~ Boolean
  | NameTag ~ UnquotedString
  | NamespaceTag ~ NamespaceId
  | AltIdTag ~ Id
  | DefTag ~ Definition
  | CommentTag ~ UnquotedString
  | SubsetTag ~ SubsetId
  | SynonymTag ~ Synonym
  | XrefTag ~ Xref
  | PropertyValueTag ~ PropertyValue
  | DomainTag ~ ClassId
  | RangeTag ~ ClassId
  | BuiltinTag ~ Boolean
  | HoldsOverChainTag ~ RelationId ~ RelationId
  | IsAntiSymmetricTag ~ Boolean
  | IsCyclicTag ~ Boolean
  | IsReflexiveTag ~ Boolean
  | IsSymmetricTag ~ Boolean
  | IsAsymmetricTag ~ Boolean
  | IsTransitiveTag ~ Boolean
  | IsFunctionalTag ~ Boolean
  | IsInverseFunctionalTag ~ Boolean
  | IsATag ~ RelationId
  | IntersectionOfTag ~ RelationId
  | UnionOfTag ~ RelationId
  | EquivalentToTag ~ RelationId
  | DisjointFromTag ~ RelationId
  | InverseOfTag ~ RelationId
  | TransitiveOverTag ~ RelationId
  | EquivalentToChainTag ~ RelationId ~ RelationId
  | DisjointOverTag ~ RelationId
  | RelationshipTag ~ RelationId ~ RelationId
  | IsObsoleteTag ~ Boolean
  | ReplacedByTag ~ RelationId
  | ConsiderTag ~ Id
  | CreatedByTag ~ UnquotedString
  | CreationDateTag ~ CreationDate
  | ExpandAssertionToTag ~ QuotedString ~ XrefList
  | ExpandExpressionToTag ~ QuotedString ~ XrefList
  | IsMetadataTagTag ~ Boolean
  | IsClassLevelTag ~ Boolean
)}


// 3.5 Instance Frames

InstanceFrame = {
      (CommentSilent? ~ nl)*
    ~ WhitespaceChar* ~ "[Instance]" ~ nl
    ~ (CommentSilent? ~ nl)*
    ~ WhitespaceChar* ~"id:" ~ InstanceId ~ EOL
    ~ (InstanceClauseLine | CommentSilent? ~ nl)*
}
InstanceClauseLine = {
    InstanceClause ~ EOL
}
InstanceClause = { WhitespaceChar* ~ (
    IsAnonymousTag ~ Boolean
  | NameTag ~ UnquotedString
  | NamespaceTag ~ NamespaceId
  | AltIdTag ~ Id
  | DefTag ~ Definition
  | CommentTag ~ UnquotedString
  | SubsetTag ~ SubsetId
  | SynonymTag ~ Synonym
  | XrefTag ~ Xref
  | PropertyValueTag ~ PropertyValue
  | InstanceOfTag ~ ClassId
  | RelationshipTag ~ RelationId ~ InstanceId
  | CreatedByTag ~ UnquotedString
  | CreationDateTag ~ CreationDate
  | IsObsoleteTag ~ Boolean
  | ReplacedByTag ~ InstanceId
  | ConsiderTag ~ Id
)}


// 3.6 Synonym scope

ExactSynonymScope = { "EXACT" }
BroadSynonymScope = { "BROAD" }
NarrowSynonymScope = { "NARROW" }
RelatedSynonymScope = { "RELATED" }
SynonymScope = @{ ExactSynonymScope | BroadSynonymScope | NarrowSynonymScope | RelatedSynonymScope }
SynonymScopeSingle = @{ SynonymScope ~ &ws }
Synonym = { QuotedString ~ SynonymScopeSingle ~ (XrefList | (SynonymTypeId ~ XrefList)) }

// 4.0 Misc

Import = ${ Iri | Id }
Definition = { QuotedString ~ XrefList }

// WORKAROUND(@althonos): the 1.4 spec requires all property values to be
//                        quote-enclosed, but this is not done currently by the
//                        owlapi and owl2obo converters. As a workaround we can
//                        accept unquoted string without whitespaces as well as
//                        quoted strings for now.

UnquotedPropertyValueTarget = @{ NonWsChar+ }

PropertyValue = { LiteralPropertyValue | ResourcePropertyValue }
LiteralPropertyValue = { RelationId ~ (QuotedString | UnquotedPropertyValueTarget) ~ Id  }
ResourcePropertyValue = { RelationId ~ Id }

// WORKAROUND(@althonos): the 1.4 spec requires that creation dates are marked
//                        in ISO8601 DateTime, but the 1.4 guide is vague and
//                        and there are some cases in the wild where the tag
//                        value only contains an ISO8601 Date. To accomodate
//                        for this, we try to parse as a DateTime first, and
//                        fallback to a Date if it fails.

CreationDate = ${ ISO8601_DateTime | ISO8601_Date }