icu_datetime/provider/skeleton/
reference.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Reference `Skeleton` implementation for parsing.
6
7use super::error::SkeletonError;
8use crate::provider::fields::{self, Field, FieldLength, FieldSymbol};
9#[cfg(feature = "datagen")]
10use crate::provider::pattern::reference::Pattern;
11use alloc::vec::Vec;
12use core::convert::TryFrom;
13use smallvec::SmallVec;
14
15/// A [`Skeleton`] is used to represent what types of `Field`s are present in a [`Pattern`]. The
16/// ordering of the [`Skeleton`]'s `Field`s have no bearing on the ordering of the `Field`s and
17/// `Literal`s in the [`Pattern`].
18///
19/// A [`Skeleton`] is a [`Vec`]`<Field>`, but with the invariant that it is sorted according to the canonical
20/// sort order. This order is sorted according to the most significant `Field` to the least significant.
21/// For example, a field with a `Minute` symbol would precede a field with a `Second` symbol.
22/// This order is documented as the order of fields as presented in the
23/// [UTS 35 Date Field Symbol Table](https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table)
24///
25/// The `Field`s are only sorted in the [`Skeleton`] in order to provide a deterministic
26/// serialization strategy, and to provide a faster [`Skeleton`] matching operation.
27#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd)]
28// TODO(#876): Use ZeroVec instead of SmallVec
29pub struct Skeleton(pub(crate) SmallVec<[fields::Field; 5]>);
30
31impl Skeleton {
32    pub(crate) fn fields_iter(&self) -> impl Iterator<Item = &Field> {
33        self.0.iter()
34    }
35
36    pub(crate) fn fields_len(&self) -> usize {
37        self.0.len()
38    }
39
40    /// Return the underlying fields as a slice.
41    pub fn as_slice(&self) -> &[fields::Field] {
42        self.0.as_slice()
43    }
44}
45
46impl From<SmallVec<[fields::Field; 5]>> for Skeleton {
47    fn from(fields: SmallVec<[fields::Field; 5]>) -> Self {
48        Self(fields)
49    }
50}
51
52impl From<Vec<fields::Field>> for Skeleton {
53    fn from(fields: Vec<fields::Field>) -> Self {
54        Self(fields.into())
55    }
56}
57
58impl From<&[fields::Field]> for Skeleton {
59    fn from(fields: &[fields::Field]) -> Self {
60        Self(fields.into())
61    }
62}
63
64/// Convert a Pattern into a Skeleton. This will remove all of the string literals, and sort
65/// the fields into the canonical sort order. Not all fields are supported by Skeletons, so map
66/// fields into skeleton-appropriate ones. For instance, in the "ja" locale the pattern "aK:mm"
67/// gets transformed into the skeleton "hmm".
68///
69/// At the time of this writing, it's being used for applying hour cycle preferences and should not
70/// be exposed as a public API for end users.
71#[doc(hidden)]
72#[cfg(feature = "datagen")]
73impl From<&Pattern> for Skeleton {
74    fn from(pattern: &Pattern) -> Self {
75        let mut fields: SmallVec<[fields::Field; 5]> = SmallVec::new();
76        for item in pattern.items() {
77            if let crate::provider::pattern::PatternItem::Field(field) = item {
78                let mut field = *field;
79
80                // Skeletons only have a subset of available fields, these are then mapped to more
81                // specific fields for the patterns they expand to.
82                field.symbol = match field.symbol {
83                    // Only the format varieties are used in the skeletons, the matched patterns
84                    // will be more specific.
85                    FieldSymbol::Month(_) => FieldSymbol::Month(fields::Month::Format),
86                    FieldSymbol::Weekday(_) => FieldSymbol::Weekday(fields::Weekday::Format),
87
88                    // Only flexible day periods are used in skeletons, ignore all others.
89                    FieldSymbol::DayPeriod(fields::DayPeriod::AmPm)
90                    | FieldSymbol::DayPeriod(fields::DayPeriod::NoonMidnight) => continue,
91                    // TODO(#487) - Flexible day periods should be included here.
92                    // FieldSymbol::DayPeriod(fields::DayPeriod::Flexible) => {
93                    //     FieldSymbol::DayPeriod(fields::DayPeriod::Flexible)
94                    // }
95
96                    // Only the H12 and H23 symbols are used in skeletons, while the patterns may
97                    // contain H11 or H23 depending on the localization.
98                    FieldSymbol::Hour(fields::Hour::H11) | FieldSymbol::Hour(fields::Hour::H12) => {
99                        FieldSymbol::Hour(fields::Hour::H12)
100                    }
101                    FieldSymbol::Hour(fields::Hour::H23) => FieldSymbol::Hour(fields::Hour::H23),
102
103                    // Pass through all of the following preferences unchanged.
104                    FieldSymbol::Minute
105                    | FieldSymbol::Second(_)
106                    | FieldSymbol::TimeZone(_)
107                    | FieldSymbol::DecimalSecond(_)
108                    | FieldSymbol::Era
109                    | FieldSymbol::Year(_)
110                    | FieldSymbol::Week(_)
111                    | FieldSymbol::Day(_) => field.symbol,
112                };
113
114                // Only insert if it's a unique field.
115                if let Err(pos) = fields.binary_search(&field) {
116                    fields.insert(pos, field)
117                }
118            }
119        }
120        Self(fields)
121    }
122}
123
124/// Parse a string into a list of fields. This trait implementation validates the input string to
125/// verify that fields are correct. If the fields are out of order, this returns an error that
126/// contains the fields, which gives the callee a chance to sort the fields with the
127/// `From<SmallVec<[fields::Field; 5]>> for Skeleton` trait.
128impl TryFrom<&str> for Skeleton {
129    type Error = SkeletonError;
130    fn try_from(skeleton_string: &str) -> Result<Self, Self::Error> {
131        let mut fields: SmallVec<[fields::Field; 5]> = SmallVec::new();
132
133        let mut iter = skeleton_string.chars().peekable();
134        while let Some(ch) = iter.next() {
135            // Go through the chars to count how often it's repeated.
136            let mut field_length: u8 = 1;
137            while let Some(next_ch) = iter.peek() {
138                if *next_ch != ch {
139                    break;
140                }
141                field_length += 1;
142                iter.next();
143            }
144
145            // Convert the byte to a valid field symbol.
146            let field_symbol = if ch == 'Z' {
147                match field_length {
148                    1..=3 => {
149                        field_length = 4;
150                        FieldSymbol::try_from('x')?
151                    }
152                    4 => FieldSymbol::try_from('O')?,
153                    5 => {
154                        field_length = 4;
155                        FieldSymbol::try_from('X')?
156                    }
157                    _ => FieldSymbol::try_from(ch)?,
158                }
159            } else {
160                FieldSymbol::try_from(ch)?
161            };
162            let field = Field::from((field_symbol, FieldLength::from_idx(field_length)?));
163
164            match fields.binary_search(&field) {
165                Ok(_) => return Err(SkeletonError::DuplicateField),
166                Err(pos) => fields.insert(pos, field),
167            }
168        }
169
170        Ok(Self::from(fields))
171    }
172}
173
174#[cfg(feature = "datagen")]
175impl core::fmt::Display for Skeleton {
176    fn fmt(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
177        use core::fmt::Write;
178        for field in self.fields_iter() {
179            let ch: char = field.symbol.into();
180            for _ in 0..field.length.to_len() {
181                formatter.write_char(ch)?;
182            }
183        }
184        Ok(())
185    }
186}