RFC 7230 URI Grammar
request-target = origin-form / absolute-form / authority-form / asterisk-form
-------------------------------------------------------------------------------
asterisk-form = "*"
-------------------------------------------------------------------------------
origin-form = absolute-path [ "?" query ]
absolute-path = 1*( "/" segment )
-------------------------------------------------------------------------------
authority-form = authority
-------------------------------------------------------------------------------
1. look for ':', '@', '?'
2. if neither is found, you have an authority, text is `host`
3. if ':' is found, have either 'host', 'scheme', or 'userinfo'
* can only be host if: next four characters are port
* must be host if: text before ':' is empty, requires port
* if next (at most) four characters are numbers, then we have a host/port.
* if next character is '/' or there is none, then scheme
* otherwise try as scheme, fallback to userinfo if find '@'
4. if '?' is found, have either 'host', 'scheme', or 'userinfo'
5. if '@' is found, have 'userinfo'
-------------------------------------------------------------------------------
absolute-form = absolute-URI
absolute-URI = scheme ":" hier-part [ "?" query ]
scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
hier-part = "//" authority path-abempty
/ path-absolute
/ path-rootless
/ path-empty
query = *( pchar / "/" / "?" )
authority = [ userinfo "@" ] host [ ":" port ]
userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
host = IP-literal / IPv4address / reg-name
port = *DIGIT
reg-name = *( unreserved / pct-encoded / sub-delims )
path-abempty = *( "/" segment )
path-absolute = "/" [ segment-nz *( "/" segment ) ]
path-noscheme = segment-nz-nc *( "/" segment )
path-rootless = segment-nz *( "/" segment )
path-empty = 0<pchar>
segment = *pchar
segment-nz = 1*pchar
pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
pct-encoded = "%" HEXDIG HEXDIG
sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
/ "*" / "+" / "," / ";" / "="
IP-literal = "[" ( IPv6address / IPvFuture ) "]"
IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
IPv6address = 6( h16 ":" ) ls32
/ "::" 5( h16 ":" ) ls32
/ [ h16 ] "::" 4( h16 ":" ) ls32
/ [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
/ [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
/ [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
/ [ *4( h16 ":" ) h16 ] "::" ls32
/ [ *5( h16 ":" ) h16 ] "::" h16
/ [ *6( h16 ":" ) h16 ] "::"
IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
dec-octet = DIGIT ; 0-9
/ %x31-39 DIGIT ; 10-99
/ "1" 2DIGIT ; 100-199
/ "2" %x30-34 DIGIT ; 200-249
/ "25" %x30-35 ; 250-255
ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
DIGIT = %x30-39 ; 0-9