use crate::{
error::{err, Error},
fmt::{
offset::{self, ParsedOffset},
Parsed,
},
tz::{TimeZone, TimeZoneDatabase},
util::{escape, parse},
};
#[derive(Debug)]
pub(crate) struct ParsedAnnotations<'i> {
#[allow(dead_code)]
input: escape::Bytes<'i>,
time_zone: Option<ParsedTimeZone<'i>>,
}
impl<'i> ParsedAnnotations<'i> {
pub(crate) fn none() -> ParsedAnnotations<'static> {
ParsedAnnotations { input: escape::Bytes(&[]), time_zone: None }
}
pub(crate) fn to_time_zone(
&self,
db: &TimeZoneDatabase,
) -> Result<Option<(TimeZone, bool)>, Error> {
let Some(ref parsed) = self.time_zone else { return Ok(None) };
match *parsed {
ParsedTimeZone::Named { critical, name } => {
let tz = match db.get(name) {
Ok(tz) => tz,
Err(err) => return Err(err!("{}", err)),
};
Ok(Some((tz, critical)))
}
ParsedTimeZone::Offset { critical, ref offset } => {
let offset = match offset.to_offset() {
Ok(offset) => offset,
Err(err) => return Err(err),
};
Ok(Some((TimeZone::fixed(offset), critical)))
}
}
}
}
#[derive(Debug)]
enum ParsedTimeZone<'i> {
Named {
critical: bool,
name: &'i str,
},
Offset {
critical: bool,
offset: ParsedOffset,
},
}
#[derive(Debug)]
pub(crate) struct Parser {
_priv: (),
}
impl Parser {
pub(crate) const fn new() -> Parser {
Parser { _priv: () }
}
pub(crate) fn parse<'i>(
&self,
input: &'i [u8],
) -> Result<Parsed<'i, ParsedAnnotations<'i>>, Error> {
let mkslice = parse::slicer(input);
let Parsed { value: time_zone, mut input } =
self.parse_time_zone_annotation(input)?;
loop {
let Parsed { value: did_consume, input: unconsumed } =
self.parse_annotation(input)?;
if !did_consume {
break;
}
input = unconsumed;
}
let value = ParsedAnnotations {
input: escape::Bytes(mkslice(input)),
time_zone,
};
Ok(Parsed { value, input })
}
fn parse_time_zone_annotation<'i>(
&self,
mut input: &'i [u8],
) -> Result<Parsed<'i, Option<ParsedTimeZone<'i>>>, Error> {
let unconsumed = input;
if input.is_empty() || input[0] != b'[' {
return Ok(Parsed { value: None, input: unconsumed });
}
input = &input[1..];
let critical = input.starts_with(b"!");
if critical {
input = &input[1..];
}
if input.starts_with(b"+") || input.starts_with(b"-") {
const P: offset::Parser =
offset::Parser::new().zulu(false).subminute(false);
let Parsed { value: offset, input } = P.parse(input)?;
let Parsed { input, .. } =
self.parse_tz_annotation_close(input)?;
let value = Some(ParsedTimeZone::Offset { critical, offset });
return Ok(Parsed { value, input });
}
let mkiana = parse::slicer(input);
let Parsed { mut input, .. } =
self.parse_tz_annotation_iana_name(input)?;
if input.starts_with(b"=") {
return Ok(Parsed { value: None, input: unconsumed });
}
while input.starts_with(b"/") {
input = &input[1..];
let Parsed { input: unconsumed, .. } =
self.parse_tz_annotation_iana_name(input)?;
input = unconsumed;
}
let iana_name = core::str::from_utf8(mkiana(input)).expect("ASCII");
let time_zone =
Some(ParsedTimeZone::Named { critical, name: iana_name });
let Parsed { input, .. } = self.parse_tz_annotation_close(input)?;
Ok(Parsed { value: time_zone, input })
}
fn parse_annotation<'i>(
&self,
mut input: &'i [u8],
) -> Result<Parsed<'i, bool>, Error> {
if input.is_empty() || input[0] != b'[' {
return Ok(Parsed { value: false, input });
}
input = &input[1..];
let critical = input.starts_with(b"!");
if critical {
input = &input[1..];
}
let Parsed { value: key, input } = self.parse_annotation_key(input)?;
let Parsed { input, .. } = self.parse_annotation_separator(input)?;
let Parsed { input, .. } = self.parse_annotation_values(input)?;
let Parsed { input, .. } = self.parse_annotation_close(input)?;
if critical {
return Err(err!(
"found unsupported RFC 9557 annotation with key {key:?} \
with the critical flag ('!') set",
key = escape::Bytes(key),
));
}
Ok(Parsed { value: true, input })
}
fn parse_tz_annotation_iana_name<'i>(
&self,
input: &'i [u8],
) -> Result<Parsed<'i, &'i [u8]>, Error> {
let mkname = parse::slicer(input);
let Parsed { mut input, .. } =
self.parse_tz_annotation_leading_char(input)?;
loop {
let Parsed { value: did_consume, input: unconsumed } =
self.parse_tz_annotation_char(input);
if !did_consume {
break;
}
input = unconsumed;
}
Ok(Parsed { value: mkname(input), input })
}
fn parse_annotation_key<'i>(
&self,
input: &'i [u8],
) -> Result<Parsed<'i, &'i [u8]>, Error> {
let mkkey = parse::slicer(input);
let Parsed { mut input, .. } =
self.parse_annotation_key_leading_char(input)?;
loop {
let Parsed { value: did_consume, input: unconsumed } =
self.parse_annotation_key_char(input);
if !did_consume {
break;
}
input = unconsumed;
}
Ok(Parsed { value: mkkey(input), input })
}
fn parse_annotation_values<'i>(
&self,
input: &'i [u8],
) -> Result<Parsed<'i, ()>, Error> {
let Parsed { mut input, .. } = self.parse_annotation_value(input)?;
while input.starts_with(b"-") {
input = &input[1..];
let Parsed { input: unconsumed, .. } =
self.parse_annotation_value(input)?;
input = unconsumed;
}
Ok(Parsed { value: (), input })
}
fn parse_annotation_value<'i>(
&self,
input: &'i [u8],
) -> Result<Parsed<'i, &'i [u8]>, Error> {
let mkvalue = parse::slicer(input);
let Parsed { mut input, .. } =
self.parse_annotation_value_leading_char(input)?;
loop {
let Parsed { value: did_consume, input: unconsumed } =
self.parse_annotation_value_char(input);
if !did_consume {
break;
}
input = unconsumed;
}
let value = mkvalue(input);
Ok(Parsed { value, input })
}
fn parse_tz_annotation_leading_char<'i>(
&self,
input: &'i [u8],
) -> Result<Parsed<'i, ()>, Error> {
if input.is_empty() {
return Err(err!(
"expected the start of an RFC 9557 annotation or IANA \
time zone component name, but found end of input instead",
));
}
if !matches!(input[0], b'_' | b'.' | b'A'..=b'Z' | b'a'..=b'z') {
return Err(err!(
"expected ASCII alphabetic byte (or underscore or period) \
at the start of an RFC 9557 annotation or time zone \
component name, but found {:?} instead",
escape::Byte(input[0]),
));
}
Ok(Parsed { value: (), input: &input[1..] })
}
fn parse_tz_annotation_char<'i>(
&self,
input: &'i [u8],
) -> Parsed<'i, bool> {
let is_tz_annotation_char = |byte| {
matches!(
byte,
b'_' | b'.' | b'+' | b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z',
)
};
if input.is_empty() || !is_tz_annotation_char(input[0]) {
return Parsed { value: false, input };
}
Parsed { value: true, input: &input[1..] }
}
fn parse_annotation_key_leading_char<'i>(
&self,
input: &'i [u8],
) -> Result<Parsed<'i, ()>, Error> {
if input.is_empty() {
return Err(err!(
"expected the start of an RFC 9557 annotation key, \
but found end of input instead",
));
}
if !matches!(input[0], b'_' | b'a'..=b'z') {
return Err(err!(
"expected lowercase alphabetic byte (or underscore) \
at the start of an RFC 9557 annotation key, \
but found {:?} instead",
escape::Byte(input[0]),
));
}
Ok(Parsed { value: (), input: &input[1..] })
}
fn parse_annotation_key_char<'i>(
&self,
input: &'i [u8],
) -> Parsed<'i, bool> {
let is_annotation_key_char =
|byte| matches!(byte, b'_' | b'-' | b'0'..=b'9' | b'a'..=b'z');
if input.is_empty() || !is_annotation_key_char(input[0]) {
return Parsed { value: false, input };
}
Parsed { value: true, input: &input[1..] }
}
fn parse_annotation_value_leading_char<'i>(
&self,
input: &'i [u8],
) -> Result<Parsed<'i, ()>, Error> {
if input.is_empty() {
return Err(err!(
"expected the start of an RFC 9557 annotation value, \
but found end of input instead",
));
}
if !matches!(input[0], b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') {
return Err(err!(
"expected alphanumeric ASCII byte \
at the start of an RFC 9557 annotation value, \
but found {:?} instead",
escape::Byte(input[0]),
));
}
Ok(Parsed { value: (), input: &input[1..] })
}
fn parse_annotation_value_char<'i>(
&self,
input: &'i [u8],
) -> Parsed<'i, bool> {
let is_annotation_value_char =
|byte| matches!(byte, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z');
if input.is_empty() || !is_annotation_value_char(input[0]) {
return Parsed { value: false, input };
}
Parsed { value: true, input: &input[1..] }
}
fn parse_annotation_separator<'i>(
&self,
input: &'i [u8],
) -> Result<Parsed<'i, ()>, Error> {
if input.is_empty() {
return Err(err!(
"expected an '=' after parsing an RFC 9557 annotation key, \
but found end of input instead",
));
}
if input[0] != b'=' {
return Err(if input[0] == b'/' {
err!(
"expected an '=' after parsing an RFC 9557 annotation \
key, but found / instead (time zone annotations must \
come first)",
)
} else {
err!(
"expected an '=' after parsing an RFC 9557 annotation \
key, but found {:?} instead",
escape::Byte(input[0]),
)
});
}
Ok(Parsed { value: (), input: &input[1..] })
}
fn parse_annotation_close<'i>(
&self,
input: &'i [u8],
) -> Result<Parsed<'i, ()>, Error> {
if input.is_empty() {
return Err(err!(
"expected an ']' after parsing an RFC 9557 annotation key \
and value, but found end of input instead",
));
}
if input[0] != b']' {
return Err(err!(
"expected an ']' after parsing an RFC 9557 annotation key \
and value, but found {:?} instead",
escape::Byte(input[0]),
));
}
Ok(Parsed { value: (), input: &input[1..] })
}
fn parse_tz_annotation_close<'i>(
&self,
input: &'i [u8],
) -> Result<Parsed<'i, ()>, Error> {
if input.is_empty() {
return Err(err!(
"expected an ']' after parsing an RFC 9557 time zone \
annotation, but found end of input instead",
));
}
if input[0] != b']' {
return Err(err!(
"expected an ']' after parsing an RFC 9557 time zone \
annotation, but found {:?} instead",
escape::Byte(input[0]),
));
}
Ok(Parsed { value: (), input: &input[1..] })
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ok_time_zone() {
if crate::tz::db().is_definitively_empty() {
return;
}
let p = |input| {
Parser::new()
.parse(input)
.unwrap()
.value
.to_time_zone(crate::tz::db())
.unwrap()
};
insta::assert_debug_snapshot!(p(b"[America/New_York]"), @r###"
Some(
(
TimeZone(
TZif(
"America/New_York",
),
),
false,
),
)
"###);
insta::assert_debug_snapshot!(p(b"[!America/New_York]"), @r###"
Some(
(
TimeZone(
TZif(
"America/New_York",
),
),
true,
),
)
"###);
insta::assert_debug_snapshot!(p(b"[america/new_york]"), @r###"
Some(
(
TimeZone(
TZif(
"America/New_York",
),
),
false,
),
)
"###);
insta::assert_debug_snapshot!(p(b"[+25:59]"), @r###"
Some(
(
TimeZone(
Fixed(
25:59:00,
),
),
false,
),
)
"###);
insta::assert_debug_snapshot!(p(b"[-25:59]"), @r###"
Some(
(
TimeZone(
Fixed(
-25:59:00,
),
),
false,
),
)
"###);
}
#[test]
fn ok_empty() {
let p = |input| Parser::new().parse(input).unwrap();
insta::assert_debug_snapshot!(p(b""), @r###"
Parsed {
value: ParsedAnnotations {
input: "",
time_zone: None,
},
input: "",
}
"###);
insta::assert_debug_snapshot!(p(b"blah"), @r###"
Parsed {
value: ParsedAnnotations {
input: "",
time_zone: None,
},
input: "blah",
}
"###);
}
#[test]
fn ok_unsupported() {
let p = |input| Parser::new().parse(input).unwrap();
insta::assert_debug_snapshot!(
p(b"[u-ca=chinese]"),
@r###"
Parsed {
value: ParsedAnnotations {
input: "[u-ca=chinese]",
time_zone: None,
},
input: "",
}
"###,
);
insta::assert_debug_snapshot!(
p(b"[u-ca=chinese-japanese]"),
@r###"
Parsed {
value: ParsedAnnotations {
input: "[u-ca=chinese-japanese]",
time_zone: None,
},
input: "",
}
"###,
);
insta::assert_debug_snapshot!(
p(b"[u-ca=chinese-japanese-russian]"),
@r###"
Parsed {
value: ParsedAnnotations {
input: "[u-ca=chinese-japanese-russian]",
time_zone: None,
},
input: "",
}
"###,
);
}
#[test]
fn ok_iana() {
let p = |input| Parser::new().parse(input).unwrap();
insta::assert_debug_snapshot!(p(b"[America/New_York]"), @r###"
Parsed {
value: ParsedAnnotations {
input: "[America/New_York]",
time_zone: Some(
Named {
critical: false,
name: "America/New_York",
},
),
},
input: "",
}
"###);
insta::assert_debug_snapshot!(p(b"[!America/New_York]"), @r###"
Parsed {
value: ParsedAnnotations {
input: "[!America/New_York]",
time_zone: Some(
Named {
critical: true,
name: "America/New_York",
},
),
},
input: "",
}
"###);
insta::assert_debug_snapshot!(p(b"[UTC]"), @r###"
Parsed {
value: ParsedAnnotations {
input: "[UTC]",
time_zone: Some(
Named {
critical: false,
name: "UTC",
},
),
},
input: "",
}
"###);
insta::assert_debug_snapshot!(p(b"[.._foo_../.0+-]"), @r###"
Parsed {
value: ParsedAnnotations {
input: "[.._foo_../.0+-]",
time_zone: Some(
Named {
critical: false,
name: ".._foo_../.0+-",
},
),
},
input: "",
}
"###);
}
#[test]
fn ok_offset() {
let p = |input| Parser::new().parse(input).unwrap();
insta::assert_debug_snapshot!(p(b"[-00]"), @r###"
Parsed {
value: ParsedAnnotations {
input: "[-00]",
time_zone: Some(
Offset {
critical: false,
offset: ParsedOffset {
kind: Numeric(
-00,
),
},
},
),
},
input: "",
}
"###);
insta::assert_debug_snapshot!(p(b"[+00]"), @r###"
Parsed {
value: ParsedAnnotations {
input: "[+00]",
time_zone: Some(
Offset {
critical: false,
offset: ParsedOffset {
kind: Numeric(
+00,
),
},
},
),
},
input: "",
}
"###);
insta::assert_debug_snapshot!(p(b"[-05]"), @r###"
Parsed {
value: ParsedAnnotations {
input: "[-05]",
time_zone: Some(
Offset {
critical: false,
offset: ParsedOffset {
kind: Numeric(
-05,
),
},
},
),
},
input: "",
}
"###);
insta::assert_debug_snapshot!(p(b"[!+05:12]"), @r###"
Parsed {
value: ParsedAnnotations {
input: "[!+05:12]",
time_zone: Some(
Offset {
critical: true,
offset: ParsedOffset {
kind: Numeric(
+05:12,
),
},
},
),
},
input: "",
}
"###);
}
#[test]
fn ok_iana_unsupported() {
let p = |input| Parser::new().parse(input).unwrap();
insta::assert_debug_snapshot!(
p(b"[America/New_York][u-ca=chinese-japanese-russian]"),
@r###"
Parsed {
value: ParsedAnnotations {
input: "[America/New_York][u-ca=chinese-japanese-russian]",
time_zone: Some(
Named {
critical: false,
name: "America/New_York",
},
),
},
input: "",
}
"###,
);
}
#[test]
fn err_iana() {
insta::assert_snapshot!(
Parser::new().parse(b"[0/Foo]").unwrap_err(),
@r###"expected ASCII alphabetic byte (or underscore or period) at the start of an RFC 9557 annotation or time zone component name, but found "0" instead"###,
);
insta::assert_snapshot!(
Parser::new().parse(b"[Foo/0Bar]").unwrap_err(),
@r###"expected ASCII alphabetic byte (or underscore or period) at the start of an RFC 9557 annotation or time zone component name, but found "0" instead"###,
);
}
#[test]
fn err_offset() {
insta::assert_snapshot!(
Parser::new().parse(b"[+").unwrap_err(),
@r###"failed to parse hours in UTC numeric offset "+": expected two digit hour after sign, but found end of input"###,
);
insta::assert_snapshot!(
Parser::new().parse(b"[+26]").unwrap_err(),
@r###"failed to parse hours in UTC numeric offset "+26]": offset hours are not valid: parameter 'hours' with value 26 is not in the required range of 0..=25"###,
);
insta::assert_snapshot!(
Parser::new().parse(b"[-26]").unwrap_err(),
@r###"failed to parse hours in UTC numeric offset "-26]": offset hours are not valid: parameter 'hours' with value 26 is not in the required range of 0..=25"###,
);
insta::assert_snapshot!(
Parser::new().parse(b"[+05:12:34]").unwrap_err(),
@r###"subminute precision for UTC numeric offset "+05:12:34]" is not enabled in this context (must provide only integral minutes)"###,
);
insta::assert_snapshot!(
Parser::new().parse(b"[+05:12:34.123456789]").unwrap_err(),
@r###"subminute precision for UTC numeric offset "+05:12:34.123456789]" is not enabled in this context (must provide only integral minutes)"###,
);
}
#[test]
fn err_critical_unsupported() {
insta::assert_snapshot!(
Parser::new().parse(b"[!u-ca=chinese]").unwrap_err(),
@r###"found unsupported RFC 9557 annotation with key "u-ca" with the critical flag ('!') set"###,
);
}
#[test]
fn err_key_leading_char() {
insta::assert_snapshot!(
Parser::new().parse(b"[").unwrap_err(),
@"expected the start of an RFC 9557 annotation or IANA time zone component name, but found end of input instead",
);
insta::assert_snapshot!(
Parser::new().parse(b"[&").unwrap_err(),
@r###"expected ASCII alphabetic byte (or underscore or period) at the start of an RFC 9557 annotation or time zone component name, but found "&" instead"###,
);
insta::assert_snapshot!(
Parser::new().parse(b"[Foo][").unwrap_err(),
@"expected the start of an RFC 9557 annotation key, but found end of input instead",
);
insta::assert_snapshot!(
Parser::new().parse(b"[Foo][&").unwrap_err(),
@r###"expected lowercase alphabetic byte (or underscore) at the start of an RFC 9557 annotation key, but found "&" instead"###,
);
}
#[test]
fn err_separator() {
insta::assert_snapshot!(
Parser::new().parse(b"[abc").unwrap_err(),
@"expected an ']' after parsing an RFC 9557 time zone annotation, but found end of input instead",
);
insta::assert_snapshot!(
Parser::new().parse(b"[_abc").unwrap_err(),
@"expected an ']' after parsing an RFC 9557 time zone annotation, but found end of input instead",
);
insta::assert_snapshot!(
Parser::new().parse(b"[abc^").unwrap_err(),
@r###"expected an ']' after parsing an RFC 9557 time zone annotation, but found "^" instead"###,
);
insta::assert_snapshot!(
Parser::new().parse(b"[Foo][abc").unwrap_err(),
@"expected an '=' after parsing an RFC 9557 annotation key, but found end of input instead",
);
insta::assert_snapshot!(
Parser::new().parse(b"[Foo][_abc").unwrap_err(),
@"expected an '=' after parsing an RFC 9557 annotation key, but found end of input instead",
);
insta::assert_snapshot!(
Parser::new().parse(b"[Foo][abc^").unwrap_err(),
@r###"expected an '=' after parsing an RFC 9557 annotation key, but found "^" instead"###,
);
}
#[test]
fn err_value() {
insta::assert_snapshot!(
Parser::new().parse(b"[abc=").unwrap_err(),
@"expected the start of an RFC 9557 annotation value, but found end of input instead",
);
insta::assert_snapshot!(
Parser::new().parse(b"[_abc=").unwrap_err(),
@"expected the start of an RFC 9557 annotation value, but found end of input instead",
);
insta::assert_snapshot!(
Parser::new().parse(b"[abc=^").unwrap_err(),
@r###"expected alphanumeric ASCII byte at the start of an RFC 9557 annotation value, but found "^" instead"###,
);
insta::assert_snapshot!(
Parser::new().parse(b"[abc=]").unwrap_err(),
@r###"expected alphanumeric ASCII byte at the start of an RFC 9557 annotation value, but found "]" instead"###,
);
}
#[test]
fn err_close() {
insta::assert_snapshot!(
Parser::new().parse(b"[abc=123").unwrap_err(),
@"expected an ']' after parsing an RFC 9557 annotation key and value, but found end of input instead",
);
insta::assert_snapshot!(
Parser::new().parse(b"[abc=123*").unwrap_err(),
@r###"expected an ']' after parsing an RFC 9557 annotation key and value, but found "*" instead"###,
);
}
#[cfg(feature = "std")]
#[test]
fn err_time_zone() {
let p = |input| {
Parser::new()
.parse(input)
.unwrap()
.value
.to_time_zone(crate::tz::db())
.unwrap_err()
};
insta::assert_snapshot!(
p(b"[Foo]"),
@"failed to find timezone 'Foo' in time zone database",
);
}
#[test]
fn err_repeated_time_zone() {
let p = |input| Parser::new().parse(input).unwrap_err();
insta::assert_snapshot!(
p(b"[america/new_york][america/new_york]"),
@"expected an '=' after parsing an RFC 9557 annotation key, but found / instead (time zone annotations must come first)",
);
}
}