rdftk_iri 0.1.9

This crate provides an implementation of the IRI and URI specifications.
Documentation
// ------------------------------------------------------------------------------------------------
// RFC-3987: Internationalized Resource Identifiers (IRIs)
//    IRI            = scheme ":" ihier-part [ "?" iquery ]
//                     [ "#" ifragment ]

iri = {
    scheme ~ ":" ~ ihier_part ~ ( "?" ~ iquery )? ~ ( "#" ~ ifragment )?
}

//    ihier-part     = "//" iauthority ipath-abempty
//                     / ipath-absolute
//                     / ipath-rootless
//                     / ipath-empty

ihier_part = {
    "//" ~ iauthority ~ ipath_abempty
    | ipath_absolute
    | ipath_rootless
    | ipath_empty
}

//    IRI-reference  = IRI / irelative-ref

iri_reference = {
    iri | irelative_ref
}

//    absolute-IRI   = scheme ":" ihier-part [ "?" iquery ]

absolute_iri = {
    scheme ~ ":" ~ ihier_part ~ ( "?" ~ iquery )?
}

//    irelative-ref  = irelative-part [ "?" iquery ] [ "#" ifragment ]

irelative_ref = {
    irelative_part ~ ( "?" ~ iquery )? ~ ( "#" ~ ifragment )?
}

//    irelative-part = "//" iauthority ipath-abempty
//                     / ipath-absolute
//                     / ipath-noscheme
//                     / ipath-empty

irelative_part = {
    "//" ~ iauthority ~ ipath_abempty
    | ipath_absolute
    | ipath_noscheme
    | ipath_empty
}

//    iauthority     = [ iuserinfo "@" ] ihost [ ":" port ]

iauthority = {
    ( iuser_info ~ "@" )? ~ ihost ~ ( ":" ~ port )?
}

//    iuserinfo      = *( iunreserved / pct-encoded / sub-delims / ":" )

iuser_info = {
    iunreserved | pct_encoded | sub_delims | ":"
}

//    ihost          = IP-literal / IPv4address / ireg-name

ihost = {
    ip_literal | ipv4_address | ireg_name
}

//    ireg-name      = *( iunreserved / pct-encoded / sub-delims )

ireg_name = {
    ( iunreserved | pct_encoded | sub_delims )*
}

//    ipath          = ipath-abempty   ; begins with "/" or is empty
//                   / ipath-absolute  ; begins with "/" but not "//"
//                   / ipath-noscheme  ; begins with a non-colon segment
//                   / ipath-rootless  ; begins with a segment
//                   / ipath-empty     ; zero characters

ipath = {
    ipath_absolute | ipath_noscheme | ipath_rootless | ipath_empty
}

//    ipath-abempty  = *( "/" isegment )

ipath_abempty = {
    ( "/" ~ isegment )*
}

//    ipath-absolute = "/" [ isegment-nz *( "/" isegment ) ]

ipath_absolute = {
    "/" ~ ( isegment_nz ~ ( "/" ~ isegment )* )?
}

//    ipath-noscheme = isegment-nz-nc *( "/" isegment )

ipath_noscheme = {
    isegment_nz_nc ~ ( "/" ~ isegment )*
}

//    ipath-rootless = isegment-nz *( "/" isegment )

ipath_rootless = {
    isegment_nz ~ ( "/" ~ isegment )*
}

//    ipath-empty    = 0<ipchar>

ipath_empty = {
    ""
}

//    isegment       = *ipchar

isegment = {
    ipchar*
}

//    isegment-nz    = 1*ipchar

isegment_nz = {
    ipchar+
}

//    isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
//                     / "@" )
//                     ; non-zero-length segment without any colon ":"

isegment_nz_nc = {
    ( iunreserved | pct_encoded | sub_delims | "@" )+
}

//    ipchar         = iunreserved / pct-encoded / sub-delims / ":"
//                     / "@"

ipchar = {
    iunreserved | pct_encoded | sub_delims | ":"
}

//    iquery         = *( ipchar / iprivate / "/" / "?" )

iquery = {
    ( ipchar | iprivate | "/" | "?" )*
}

//    ifragment      = *( ipchar / "/" / "?" )

ifragment = {
    ( ipchar | "/" | "?" )*
}

//    iunreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar

iunreserved = {
    ALPHA | DIGIT | "-" | "." | "_" | "~" | ucschar
}

//    ucschar        = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
//                     / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
//                     / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
//                     / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
//                     / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
//                     / %xD0000-DFFFD / %xE1000-EFFFD

ucschar = {
    '\u{A0}'..'\u{D7FF}'       | '\u{F900}'..'\u{FDCF}'   | '\u{FDF0}'..'\u{FFEF}'
    | '\u{10000}'..'\u{1FFFD}' | '\u{20000}'..'\u{2FFFD}' | '\u{30000}'..'\u{3FFFD}'
    | '\u{40000}'..'\u{4FFFD}' | '\u{50000}'..'\u{5FFFD}' | '\u{60000}'..'\u{6FFFD}'
    | '\u{70000}'..'\u{7FFFD}' | '\u{80000}'..'\u{8FFFD}' | '\u{90000}'..'\u{9FFFD}'
    | '\u{A0000}'..'\u{AFFFD}' | '\u{B0000}'..'\u{BFFFD}' | '\u{C0000}'..'\u{CFFFD}'
    | '\u{D0000}'..'\u{DFFFD}' | '\u{E1000}'..'\u{EFFFD}'
}

//    iprivate       = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD

iprivate = {
    '\u{E000}'..'\u{F8FF}' | '\u{F0000}'..'\u{FFFFD}' | '\u{100000}'..'\u{10FFFD}'
}

// ------------------------------------------------------------------------------------------------
// RFC-3986: Uniform Resource Identifier (URI): Generic Syntax
//
//    URI           = scheme ":" hier-part [ "?" query ] [ "#" fragment ]

uri = {
    scheme ~ ":" ~ hier_part ~ ( "?" ~ query )? ~ ( "#" ~ fragment )?
}

//    hier-part     = "//" authority path-abempty
//                  / path-absolute
//                  / path-rootless
//                  / path-empty

hier_part = {
    "//" ~ authority ~ path_abempty
    | path_absolute
    | path_rootless
    | path_empty
}

//    URI-reference = URI / relative-ref

uri_reference = {
    uri | relative_ref
}

//    absolute-URI  = scheme ":" hier-part [ "?" query ]

absolute_uri = {
    scheme ~ ":" ~ hier_part ~ ( "?" ~ query )?
}

//    relative-ref  = relative-part [ "?" query ] [ "#" fragment ]

relative_ref = {
    relative_part ~ ( "?" ~ query )? ~ ( "#" ~ fragment )?
}

//    relative-part = "//" authority path-abempty
//                  / path-absolute
//                  / path-noscheme
//                  / path-empty

relative_part = {
    "//" ~ authority ~ path_abempty
    | path_absolute
    | path_noscheme
    | path_empty
}

//    scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )

scheme = {
    ALPHA ~ ( ALPHA | DIGIT | "+" | "-" | "." )*
}

//    authority     = [ userinfo "@" ] host [ ":" port ]

authority = {
    (
        ( user_info ~ "@" )? ~
        host ~
        ( ":" ~ port )?
    )?
}

//    userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )

user_info = {
    ( unreserved | pct_encoded | sub_delims | ":" )*
}

//    host          = IP-literal / IPv4address / reg-name

host = {
    ip_literal | ipv4_address | reg_name
}

//    port          = *DIGIT

port = {
    DIGIT*
}

//    IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"

ip_literal = {
    "[" ~ ( ipv6_address | ipv_future ) ~ "]"
}

//    IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )

ipv_future = {
    "v" ~ HEXDIG+ ~ "." ~ ( unreserved | sub_delims | ":" )+
}

//    IPv6address   =                            6( h16 ":" ) ls32
//                  /                       "::" 5( h16 ":" ) ls32
//                  / [               h16 ] "::" 4( h16 ":" ) ls32
//                  / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
//                  / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
//                  / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
//                  / [ *4( h16 ":" ) h16 ] "::"              ls32
//                  / [ *5( h16 ":" ) h16 ] "::"              h16
//                  / [ *6( h16 ":" ) h16 ] "::"

ipv6_address = {
    ( h16 ~ ":" ){6} ~ ls32
    | "::" ~ ( h16 ~ ":" ){5} ~ ls32
    | ( h16 )? ~ "::" ~ ( h16 ~ ":" ){4} ~ ls32
    | ( ( h16 ~ ":" ){0,1} ~ h16 )? ~ "::" ~ ( h16 ~ ":" ){3} ~ ls32
    | ( ( h16 ~ ":" ){0,2} ~ h16 )? ~ "::" ~ ( h16 ~ ":" ){2} ~ ls32
    | ( ( h16 ~ ":" ){0,3} ~ h16 )? ~ "::" ~ h16 ~ ":" ~ ls32
    | ( ( h16 ~ ":" ){0,4} ~ h16 )? ~ "::" ~ ls32
    | ( ( h16 ~ ":" ){0,5} ~ h16 )? ~ "::" ~ h16
    | ( ( h16 ~ ":" ){0,6} ~ h16 )? ~ "::"
}

//    h16           = 1*4HEXDIG

h16 = {
    HEXDIG{1,4}
}

//    ls32          = ( h16 ":" h16 ) / IPv4address

ls32 = {

    h16 ~ ":" ~ h16
    | ipv4_address
}

//    IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet

ipv4_address = {
    dec_octet ~ "." ~ dec_octet ~ "." ~ dec_octet ~ "." ~ dec_octet
}

//    dec-octet     = DIGIT                 ; 0-9
//                  / %x31-39 DIGIT         ; 10-99
//                  / "1" 2DIGIT            ; 100-199
//                  / "2" %x30-34 DIGIT     ; 200-249
//                  / "25" %x30-35          ; 250-255

dec_octet = {
    DIGIT
    | ASCII_NONZERO_DIGIT ~ DIGIT
    | "1" ~ DIGIT
    | "2" ~ ( "0" | "1" | "2" | "3" | "4" ) ~ DIGIT
    | "25" ~ ( "0" | "1" | "2" | "3" | "4" | "5" )
}

//    reg-name      = *( unreserved / pct-encoded / sub-delims )

reg_name = {
    ( unreserved | pct_encoded | sub_delims )*
}

//    path          = path-abempty    ; begins with "/" or is empty
//                  / path-absolute   ; begins with "/" but not "//"
//                  / path-noscheme   ; begins with a non-colon segment
//                  / path-rootless   ; begins with a segment
//                  / path-empty      ; zero characters

path = {
//    path_abempty
    path_absolute
    | path_noscheme
    | path_rootless
    | path_empty
}

//    path-abempty  = *( "/" segment )

path_abempty = {
    ( "/" ~ segment )*
}

//    path-absolute = "/" [ segment-nz *( "/" segment ) ]

path_absolute = {
    "/" ~ ( segment_nz ~ ( "/" ~ segment )* )?
}

//    path-noscheme = segment-nz-nc *( "/" segment )

path_noscheme = {
    segment_nz_nc ~ ( "/" ~ segment )*
}

//    path-rootless = segment-nz *( "/" segment )

path_rootless = {
    segment_nz ~ ( "/" ~ segment )*
}

//    path-empty    = 0<pchar>

path_empty = { "" }

//    segment       = *pchar

segment = {
    pchar*
}

//    segment-nz    = 1*pchar

segment_nz = {
    pchar+
}

//    segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
//                  ; non-zero-length segment without any colon ":"

segment_nz_nc = {
    ( unreserved | pct_encoded | sub_delims | "@" )+
}

//    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"

pchar = {
    unreserved | pct_encoded | sub_delims | ":" | "@"
}

//    query         = *( pchar / "/" / "?" )

query = {
    ( pchar | "/" | "?" )*
}

//    fragment      = *( pchar / "/" / "?" )

fragment = {
    ( pchar | "/" | "?" )*
}

//    pct-encoded   = "%" HEXDIG HEXDIG

pct_encoded = {
    "%" ~ HEXDIG{2}
}

//    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"

unreserved = {
    ALPHA | DIGIT | "-" | "." | "_" | "~"
}

//    reserved      = gen-delims / sub-delims

reserved = {
    gen_delims | sub_delims
}

//    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"

gen_delims = {
    ":" | "|" | "?" | "#" | "[" | "]" | "@"
}

//    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
//                  / "*" / "+" / "," / ";" / "="

sub_delims = {
    "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "="
}

//

// ------------------------------------------------------------------------------------------------
// RFC--234: Augmented BNF for Syntax Specifications: ABNF
//
//    ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z

ALPHA = {
    ASCII_ALPHA
}

//    BIT            =  "0" / "1"

BIT = {
    "0" | "1"
}

//    CHAR           =  %x01-7F
//                           ; any 7-bit US-ASCII character, excluding NUL

CHAR = {
    ASCII
}

//    CR             =  %x0D
//                           ; carriage return

CR = { "\r" }

//    CRLF           =  CR LF
//                           ; Internet standard newline

CRLF = { 
    CR ~ LF
}

//    CTL            =  %x00-1F / %x7F
//                           ; controls

CTL = {
    '\u{00}'..'\u{1F}' | '\u{7F}'..'\u{7F}'
}

//    DIGIT          =  %x30-39
//                           ; 0-9

DIGIT = {
    ASCII_DIGIT
}

//    DQUOTE         =  %x22
//                           ; " (Double Quote)

DQUOTE = {
    "\""
}

//    HEXDIG         =  DIGIT / "A" / "B" / "C" / "D" / "E" / "F"

HEXDIG = {
    DIGIT | "A" | "B" | "C" | "D" | "E" | "F"
}

//    HTAB           =  %x09
//                           ; horizontal tab

HTAB = {
    "\t"
}

//    LF             =  %x0A
//                           ; linefeed

LF = {
    "\n"
}

//    LWSP           =  *(WSP / CRLF WSP)
//                           ; linear white space (past newline)

LWSP = {
    ( WSP | CRLF ~ WSP)*
}

//    OCTET          =  %x00-FF
//                           ; 8 bits of data


//    SP             =  %x20
//                        ; space

SP = {
    " "
}

//    VCHAR          =  %x21-7E
//                           ; visible (printing) characters

VCHAR = {
    '\u{21}'..'\u{7E}'
}

//    WSP            =  SP / HTAB
//                           ; white space

WSP = {
    SP | HTAB
}