icu_datetime/provider/skeleton/reference.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Reference `Skeleton` implementation for parsing.
6
7use super::error::SkeletonError;
8use crate::provider::fields::{self, Field, FieldLength, FieldSymbol};
9#[cfg(feature = "datagen")]
10use crate::provider::pattern::reference::Pattern;
11use alloc::vec::Vec;
12use core::convert::TryFrom;
13use smallvec::SmallVec;
14
15/// A [`Skeleton`] is used to represent what types of `Field`s are present in a [`Pattern`]. The
16/// ordering of the [`Skeleton`]'s `Field`s have no bearing on the ordering of the `Field`s and
17/// `Literal`s in the [`Pattern`].
18///
19/// A [`Skeleton`] is a [`Vec`]`<Field>`, but with the invariant that it is sorted according to the canonical
20/// sort order. This order is sorted according to the most significant `Field` to the least significant.
21/// For example, a field with a `Minute` symbol would precede a field with a `Second` symbol.
22/// This order is documented as the order of fields as presented in the
23/// [UTS 35 Date Field Symbol Table](https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table)
24///
25/// The `Field`s are only sorted in the [`Skeleton`] in order to provide a deterministic
26/// serialization strategy, and to provide a faster [`Skeleton`] matching operation.
27#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd)]
28// TODO(#876): Use ZeroVec instead of SmallVec
29pub struct Skeleton(pub(crate) SmallVec<[fields::Field; 5]>);
30
31impl Skeleton {
32 pub(crate) fn fields_iter(&self) -> impl Iterator<Item = &Field> {
33 self.0.iter()
34 }
35
36 pub(crate) fn fields_len(&self) -> usize {
37 self.0.len()
38 }
39
40 /// Return the underlying fields as a slice.
41 pub fn as_slice(&self) -> &[fields::Field] {
42 self.0.as_slice()
43 }
44}
45
46impl From<SmallVec<[fields::Field; 5]>> for Skeleton {
47 fn from(fields: SmallVec<[fields::Field; 5]>) -> Self {
48 Self(fields)
49 }
50}
51
52impl From<Vec<fields::Field>> for Skeleton {
53 fn from(fields: Vec<fields::Field>) -> Self {
54 Self(fields.into())
55 }
56}
57
58impl From<&[fields::Field]> for Skeleton {
59 fn from(fields: &[fields::Field]) -> Self {
60 Self(fields.into())
61 }
62}
63
64/// Convert a Pattern into a Skeleton. This will remove all of the string literals, and sort
65/// the fields into the canonical sort order. Not all fields are supported by Skeletons, so map
66/// fields into skeleton-appropriate ones. For instance, in the "ja" locale the pattern "aK:mm"
67/// gets transformed into the skeleton "hmm".
68///
69/// At the time of this writing, it's being used for applying hour cycle preferences and should not
70/// be exposed as a public API for end users.
71#[doc(hidden)]
72#[cfg(feature = "datagen")]
73impl From<&Pattern> for Skeleton {
74 fn from(pattern: &Pattern) -> Self {
75 let mut fields: SmallVec<[fields::Field; 5]> = SmallVec::new();
76 for item in pattern.items() {
77 if let crate::provider::pattern::PatternItem::Field(field) = item {
78 let mut field = *field;
79
80 // Skeletons only have a subset of available fields, these are then mapped to more
81 // specific fields for the patterns they expand to.
82 field.symbol = match field.symbol {
83 // Only the format varieties are used in the skeletons, the matched patterns
84 // will be more specific.
85 FieldSymbol::Month(_) => FieldSymbol::Month(fields::Month::Format),
86 FieldSymbol::Weekday(_) => FieldSymbol::Weekday(fields::Weekday::Format),
87
88 // Only flexible day periods are used in skeletons, ignore all others.
89 FieldSymbol::DayPeriod(fields::DayPeriod::AmPm)
90 | FieldSymbol::DayPeriod(fields::DayPeriod::NoonMidnight) => continue,
91 // TODO(#487) - Flexible day periods should be included here.
92 // FieldSymbol::DayPeriod(fields::DayPeriod::Flexible) => {
93 // FieldSymbol::DayPeriod(fields::DayPeriod::Flexible)
94 // }
95
96 // Only the H12 and H23 symbols are used in skeletons, while the patterns may
97 // contain H11 or H23 depending on the localization.
98 FieldSymbol::Hour(fields::Hour::H11) | FieldSymbol::Hour(fields::Hour::H12) => {
99 FieldSymbol::Hour(fields::Hour::H12)
100 }
101 FieldSymbol::Hour(fields::Hour::H23) => FieldSymbol::Hour(fields::Hour::H23),
102
103 // Pass through all of the following preferences unchanged.
104 FieldSymbol::Minute
105 | FieldSymbol::Second(_)
106 | FieldSymbol::TimeZone(_)
107 | FieldSymbol::DecimalSecond(_)
108 | FieldSymbol::Era
109 | FieldSymbol::Year(_)
110 | FieldSymbol::Week(_)
111 | FieldSymbol::Day(_) => field.symbol,
112 };
113
114 // Only insert if it's a unique field.
115 if let Err(pos) = fields.binary_search(&field) {
116 fields.insert(pos, field)
117 }
118 }
119 }
120 Self(fields)
121 }
122}
123
124/// Parse a string into a list of fields. This trait implementation validates the input string to
125/// verify that fields are correct. If the fields are out of order, this returns an error that
126/// contains the fields, which gives the callee a chance to sort the fields with the
127/// `From<SmallVec<[fields::Field; 5]>> for Skeleton` trait.
128impl TryFrom<&str> for Skeleton {
129 type Error = SkeletonError;
130 fn try_from(skeleton_string: &str) -> Result<Self, Self::Error> {
131 let mut fields: SmallVec<[fields::Field; 5]> = SmallVec::new();
132
133 let mut iter = skeleton_string.chars().peekable();
134 while let Some(ch) = iter.next() {
135 // Go through the chars to count how often it's repeated.
136 let mut field_length: u8 = 1;
137 while let Some(next_ch) = iter.peek() {
138 if *next_ch != ch {
139 break;
140 }
141 field_length += 1;
142 iter.next();
143 }
144
145 // Convert the byte to a valid field symbol.
146 let field_symbol = if ch == 'Z' {
147 match field_length {
148 1..=3 => {
149 field_length = 4;
150 FieldSymbol::try_from('x')?
151 }
152 4 => FieldSymbol::try_from('O')?,
153 5 => {
154 field_length = 4;
155 FieldSymbol::try_from('X')?
156 }
157 _ => FieldSymbol::try_from(ch)?,
158 }
159 } else {
160 FieldSymbol::try_from(ch)?
161 };
162 let field = Field::from((field_symbol, FieldLength::from_idx(field_length)?));
163
164 match fields.binary_search(&field) {
165 Ok(_) => return Err(SkeletonError::DuplicateField),
166 Err(pos) => fields.insert(pos, field),
167 }
168 }
169
170 Ok(Self::from(fields))
171 }
172}
173
174#[cfg(feature = "datagen")]
175impl core::fmt::Display for Skeleton {
176 fn fmt(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
177 use core::fmt::Write;
178 for field in self.fields_iter() {
179 let ch: char = field.symbol.into();
180 for _ in 0..field.length.to_len() {
181 formatter.write_char(ch)?;
182 }
183 }
184 Ok(())
185 }
186}