icu_messageformat_parser/
ast.rs

1use serde::ser::{SerializeMap, SerializeStruct};
2use serde::{Serialize, Serializer};
3use serde_repr::Serialize_repr;
4use std::fmt;
5#[cfg(feature = "utf16")]
6use widestring::Utf16Str;
7
8use crate::intl::date_time_format_options::JsIntlDateTimeFormatOptions;
9use crate::intl::number_format_options::JsIntlNumberFormatOptions;
10
11/// The type of an error that occurred while building an AST.
12#[derive(Clone, Debug, Eq, PartialEq, Serialize_repr)]
13#[repr(u8)]
14pub enum ErrorKind {
15    /// Argument is unclosed (e.g. `{0`)
16    ExpectArgumentClosingBrace = 1,
17    /// Argument is empty (e.g. `{}`).
18    EmptyArgument = 2,
19    /// Argument is malformed (e.g. `{foo!}``)
20    MalformedArgument = 3,
21    /// Expect an argument type (e.g. `{foo,}`)
22    ExpectArgumentType = 4,
23    /// Unsupported argument type (e.g. `{foo,foo}`)
24    InvalidArgumentType = 5,
25    /// Expect an argument style (e.g. `{foo, number, }`)
26    ExpectArgumentStyle = 6,
27    /// The number skeleton is invalid.
28    InvalidNumberSkeleton = 7,
29    /// The date time skeleton is invalid.
30    InvalidDateTimeSkeleton = 8,
31    /// Exepct a number skeleton following the `::` (e.g. `{foo, number, ::}`)
32    ExpectNumberSkeleton = 9,
33    /// Exepct a date time skeleton following the `::` (e.g. `{foo, date, ::}`)
34    ExpectDateTimeSkeleton = 10,
35    /// Unmatched apostrophes in the argument style (e.g. `{foo, number, 'test`)
36    UnclosedQuoteInArgumentStyle = 11,
37    /// Missing select argument options (e.g. `{foo, select}`)
38    ExpectSelectArgumentOptions = 12,
39
40    /// Expecting an offset value in `plural` or `selectordinal` argument (e.g `{foo, plural, offset}`)
41    ExpectPluralArgumentOffsetValue = 13,
42    /// Offset value in `plural` or `selectordinal` is invalid (e.g. `{foo, plural, offset: x}`)
43    InvalidPluralArgumentOffsetValue = 14,
44
45    /// Expecting a selector in `select` argument (e.g `{foo, select}`)
46    ExpectSelectArgumentSelector = 15,
47    /// Expecting a selector in `plural` or `selectordinal` argument (e.g `{foo, plural}`)
48    ExpectPluralArgumentSelector = 16,
49
50    /// Expecting a message fragment after the `select` selector (e.g. `{foo, select, apple}`)
51    ExpectSelectArgumentSelectorFragment = 17,
52    /// Expecting a message fragment after the `plural` or `selectordinal` selector
53    /// (e.g. `{foo, plural, one}`)
54    ExpectPluralArgumentSelectorFragment = 18,
55
56    /// Selector in `plural` or `selectordinal` is malformed (e.g. `{foo, plural, =x {#}}`)
57    InvalidPluralArgumentSelector = 19,
58
59    /// Duplicate selectors in `plural` or `selectordinal` argument.
60    /// (e.g. {foo, plural, one {#} one {#}})
61    DuplicatePluralArgumentSelector = 20,
62    /// Duplicate selectors in `select` argument.
63    /// (e.g. {foo, select, apple {apple} apple {apple}})
64    DuplicateSelectArgumentSelector = 21,
65
66    /// Plural or select argument option must have `other` clause.
67    MissingOtherClause = 22,
68
69    /// The tag is malformed. (e.g. `<bold!>foo</bold!>)
70    InvalidTag = 23,
71    /// The tag name is invalid. (e.g. `<123>foo</123>`)
72    InvalidTagName = 25,
73    /// The closing tag does not match the opening tag. (e.g. `<bold>foo</italic>`)
74    UnmatchedClosingTag = 26,
75    /// The opening tag has unmatched closing tag. (e.g. `<bold>foo`)
76    UnclosedTag = 27,
77}
78
79impl fmt::Display for ErrorKind {
80    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
81        match self {
82            ErrorKind::ExpectArgumentClosingBrace => write!(f, "EXPECT_ARGUMENT_CLOSING_BRACE"),
83            ErrorKind::EmptyArgument => write!(f, "EMPTY_ARGUMENT"),
84            ErrorKind::MalformedArgument => write!(f, "MALFORMED_ARGUMENT"),
85            ErrorKind::ExpectArgumentType => write!(f, "EXPECT_ARGUMENT_TYPE"),
86            ErrorKind::InvalidArgumentType => write!(f, "INVALID_ARGUMENT_TYPE"),
87            ErrorKind::ExpectArgumentStyle => write!(f, "EXPECT_ARGUMENT_STYLE"),
88            ErrorKind::InvalidNumberSkeleton => write!(f, "INVALID_NUMBER_SKELETON"),
89            ErrorKind::InvalidDateTimeSkeleton => write!(f, "INVALID_DATE_TIME_SKELETON"),
90            ErrorKind::ExpectNumberSkeleton => write!(f, "EXPECT_NUMBER_SKELETON"),
91            ErrorKind::ExpectDateTimeSkeleton => write!(f, "EXPECT_DATE_TIME_SKELETON"),
92            ErrorKind::UnclosedQuoteInArgumentStyle => {
93                write!(f, "UNCLOSED_QUOTE_IN_ARGUMENT_STYLE")
94            }
95            ErrorKind::ExpectSelectArgumentOptions => write!(f, "EXPECT_SELECT_ARGUMENT_OPTIONS"),
96            ErrorKind::ExpectPluralArgumentOffsetValue => {
97                write!(f, "EXPECT_PLURAL_ARGUMENT_OFFSET_VALUE")
98            }
99            ErrorKind::InvalidPluralArgumentOffsetValue => {
100                write!(f, "INVALID_PLURAL_ARGUMENT_OFFSET_VALUE")
101            }
102            ErrorKind::ExpectSelectArgumentSelector => write!(f, "EXPECT_SELECT_ARGUMENT_SELECTOR"),
103            ErrorKind::ExpectPluralArgumentSelector => write!(f, "EXPECT_PLURAL_ARGUMENT_SELECTOR"),
104            ErrorKind::ExpectSelectArgumentSelectorFragment => {
105                write!(f, "EXPECT_SELECT_ARGUMENT_SELECTOR_FRAGMENT")
106            }
107            ErrorKind::ExpectPluralArgumentSelectorFragment => {
108                write!(f, "EXPECT_PLURAL_ARGUMENT_SELECTOR_FRAGMENT")
109            }
110            ErrorKind::InvalidPluralArgumentSelector => {
111                write!(f, "INVALID_PLURAL_ARGUMENT_SELECTOR")
112            }
113            ErrorKind::DuplicatePluralArgumentSelector => {
114                write!(f, "DUPLICATE_PLURAL_ARGUMENT_SELECTOR")
115            }
116            ErrorKind::DuplicateSelectArgumentSelector => {
117                write!(f, "DUPLICATE_SELECT_ARGUMENT_SELECTOR")
118            }
119            ErrorKind::MissingOtherClause => write!(f, "MISSING_OTHER_CLAUSE"),
120            ErrorKind::InvalidTag => write!(f, "INVALID_TAG"),
121            ErrorKind::InvalidTagName => write!(f, "INVALID_TAG_NAME"),
122            ErrorKind::UnmatchedClosingTag => write!(f, "UNMATCHED_CLOSING_TAG"),
123            ErrorKind::UnclosedTag => write!(f, "UNCLOSED_TAG"),
124        }
125    }
126}
127
128/// A single position in an ICU message.
129///
130/// A position encodes one half of a span, and include the code unit offset, line
131/// number and column number.
132#[derive(Clone, Copy, Eq, PartialEq, Serialize)]
133#[serde(rename_all = "camelCase")]
134pub struct Position {
135    pub offset: usize,
136    pub line: usize,
137    pub column: usize,
138}
139
140impl Position {
141    pub fn new(offset: usize, line: usize, column: usize) -> Position {
142        Position {
143            offset,
144            line,
145            column,
146        }
147    }
148}
149
150impl fmt::Debug for Position {
151    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
152        write!(
153            f,
154            "Position::new({:?}, {:?}, {:?})",
155            self.offset, self.line, self.column
156        )
157    }
158}
159
160/// Span represents the position information of a single AST item.
161///
162/// All span positions are absolute byte offsets that can be used on the
163/// original regular expression that was parsed.
164#[derive(Clone, Copy, Eq, PartialEq, Serialize)]
165#[serde(rename_all = "camelCase")]
166pub struct Span {
167    /// The start byte offset.
168    pub start: Position,
169    /// The end byte offset.
170    pub end: Position,
171}
172
173impl fmt::Debug for Span {
174    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
175        write!(f, "Span::new({:?}, {:?})", self.start, self.end)
176    }
177}
178
179impl Span {
180    /// Create a new span with the given positions.
181    pub fn new(start: Position, end: Position) -> Span {
182        Span { start, end }
183    }
184}
185
186/// An error that occurred while parsing an ICU message into an abstract
187/// syntax tree.
188#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
189pub struct Error {
190    /// The kind of error.
191    pub kind: ErrorKind,
192    /// The original message that the parser generated the error from. Every
193    /// span in an error is a valid range into this string.
194    pub message: String,
195    /// The span of this error.
196    #[serde(skip_serializing_if = "Option::is_none")]
197    pub location: Option<Span>,
198}
199
200/// An abstract syntax tree for a ICU message. Adapted from:
201/// https://github.com/formatjs/formatjs/blob/c03d4989323a33765798acdd74fb4f5b01f0bdcd/packages/intl-messageformat-parser/src/types.ts
202pub type Ast<'s> = Vec<AstElement<'s>>;
203
204#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
205#[serde(rename_all = "camelCase")]
206pub enum PluralType {
207    Cardinal,
208    Ordinal,
209}
210
211#[derive(Clone, Debug, PartialEq)]
212pub enum AstElement<'s> {
213    /// Raw text
214    Literal {
215        value: String,
216        span: Option<Span>,
217    },
218    /// Variable w/o any format, e.g `var` in `this is a {var}`
219    Argument { value: String, span: Option<Span> },
220    /// Variable w/ number format
221    Number {
222        value: String,
223        span: Option<Span>,
224        style: Option<NumberArgStyle<'s>>,
225    },
226    /// Variable w/ date format
227    Date {
228        value: String,
229        span: Option<Span>,
230        style: Option<DateTimeArgStyle<'s>>,
231    },
232    /// Variable w/ time format
233    Time {
234        value: String,
235        span: Option<Span>,
236        style: Option<DateTimeArgStyle<'s>>,
237    },
238    /// Variable w/ select format
239    Select {
240        value: String,
241        span: Option<Span>,
242        options: PluralOrSelectOptions<'s>,
243    },
244    /// Variable w/ plural format
245    Plural {
246        value: String,
247        plural_type: PluralType,
248        span: Option<Span>,
249        // TODO: want to use double here but it does not implement Eq trait.
250        offset: i64,
251        options: PluralOrSelectOptions<'s>,
252    },
253    /// Only possible within plural argument.
254    /// This is the `#` symbol that will be substituted with the count.
255    Pound(Span),
256    /// XML-like tag
257    Tag {
258        value: &'s str,
259        span: Option<Span>,
260        children: Box<Ast<'s>>,
261    },
262}
263
264// Until this is resolved, we have to roll our own serialization: https://github.com/serde-rs/serde/issues/745
265impl<'s> Serialize for AstElement<'s> {
266    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
267    where
268        S: Serializer,
269    {
270        match *self {
271            AstElement::Literal {
272                ref value,
273                ref span,
274            } => {
275                let mut state = serializer.serialize_struct("Literal", 3)?;
276                state.serialize_field("type", &0)?;
277                state.serialize_field("value", value)?;
278                if span.is_some() {
279                    state.serialize_field("location", span)?;
280                }
281                state.end()
282            }
283            AstElement::Argument {
284                ref value,
285                ref span,
286            } => {
287                let mut state = serializer.serialize_struct("Argument", 3)?;
288                state.serialize_field("type", &1)?;
289                state.serialize_field("value", value)?;
290                if span.is_some() {
291                    state.serialize_field("location", span)?;
292                }
293                state.end()
294            }
295            AstElement::Number {
296                ref value,
297                ref span,
298                ref style,
299            } => {
300                let mut state = serializer.serialize_struct("Number", 4)?;
301                state.serialize_field("type", &2)?;
302                state.serialize_field("value", value)?;
303                if span.is_some() {
304                    state.serialize_field("location", span)?;
305                }
306                if style.is_some() {
307                    state.serialize_field("style", style)?;
308                }
309                state.end()
310            }
311            AstElement::Date {
312                ref value,
313                ref span,
314                ref style,
315            } => {
316                let mut state = serializer.serialize_struct("Date", 4)?;
317                state.serialize_field("type", &3)?;
318                state.serialize_field("value", value)?;
319                if span.is_some() {
320                    state.serialize_field("location", span)?;
321                }
322                if style.is_some() {
323                    state.serialize_field("style", style)?;
324                }
325                state.end()
326            }
327            AstElement::Time {
328                ref value,
329                ref span,
330                ref style,
331            } => {
332                let mut state = serializer.serialize_struct("Time", 4)?;
333                state.serialize_field("type", &4)?;
334                state.serialize_field("value", value)?;
335                if span.is_some() {
336                    state.serialize_field("location", span)?;
337                }
338                if style.is_some() {
339                    state.serialize_field("style", style)?;
340                }
341                state.end()
342            }
343            AstElement::Select {
344                ref value,
345                ref span,
346                ref options,
347            } => {
348                let mut state = serializer.serialize_struct("Select", 4)?;
349                state.serialize_field("type", &5)?;
350                state.serialize_field("value", value)?;
351                state.serialize_field("options", options)?;
352                if span.is_some() {
353                    state.serialize_field("location", span)?;
354                }
355                state.end()
356            }
357            AstElement::Plural {
358                ref value,
359                ref span,
360                ref plural_type,
361                ref offset,
362                ref options,
363            } => {
364                let mut state = serializer.serialize_struct("Plural", 6)?;
365                state.serialize_field("type", &6)?;
366                state.serialize_field("value", value)?;
367                state.serialize_field("options", options)?;
368                state.serialize_field("offset", offset)?;
369                state.serialize_field("pluralType", plural_type)?;
370                if span.is_some() {
371                    state.serialize_field("location", span)?;
372                }
373                state.end()
374            }
375            AstElement::Pound(ref span) => {
376                let mut state = serializer.serialize_struct("Pound", 2)?;
377                state.serialize_field("type", &7)?;
378                state.serialize_field("location", span)?;
379                state.end()
380            }
381            AstElement::Tag {
382                ref value,
383                ref span,
384                ref children,
385            } => {
386                let mut state = serializer.serialize_struct("Pound", 2)?;
387                state.serialize_field("type", &8)?;
388                state.serialize_field("value", value)?;
389                state.serialize_field("children", children)?;
390                if span.is_some() {
391                    state.serialize_field("location", span)?;
392                }
393                state.end()
394            }
395        }
396    }
397}
398
399#[cfg(feature = "utf16")]
400#[derive(Clone, Debug, PartialEq)]
401pub struct PluralOrSelectOptions<'s>(pub Vec<(&'s Utf16Str, PluralOrSelectOption<'s>)>);
402
403/// Workaround of Rust's orphan impl rule
404#[cfg(not(feature = "utf16"))]
405#[derive(Clone, Debug, PartialEq)]
406pub struct PluralOrSelectOptions<'s>(pub Vec<(&'s str, PluralOrSelectOption<'s>)>);
407
408impl<'s> Serialize for PluralOrSelectOptions<'s> {
409    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
410    where
411        S: Serializer,
412    {
413        let options = &self.0;
414        let mut state = serializer.serialize_map(Some(options.len()))?;
415        for (selector, fragment) in options {
416            #[cfg(feature = "utf16")]
417            let s = selector.to_string();
418            #[cfg(feature = "utf16")]
419            let s = s.as_str();
420            #[cfg(not(feature = "utf16"))]
421            let s = selector;
422            state.serialize_entry(s, fragment)?;
423        }
424        state.end()
425    }
426}
427
428#[derive(Clone, Debug, PartialEq, Serialize)]
429#[serde(untagged)]
430pub enum NumberArgStyle<'s> {
431    Style(&'s str),
432    Skeleton(NumberSkeleton<'s>),
433}
434
435#[derive(Clone, Debug, PartialEq, Serialize)]
436#[serde(rename_all = "camelCase")]
437pub struct NumberSkeleton<'s> {
438    #[serde(rename = "type")]
439    pub skeleton_type: SkeletonType,
440    pub tokens: Vec<NumberSkeletonToken<'s>>,
441    #[serde(skip_serializing_if = "Option::is_none")]
442    pub location: Option<Span>,
443    pub parsed_options: JsIntlNumberFormatOptions,
444}
445
446#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
447#[serde(rename_all = "camelCase")]
448pub struct NumberSkeletonToken<'s> {
449    pub stem: &'s str,
450    pub options: Vec<&'s str>,
451}
452
453#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
454#[serde(untagged)]
455pub enum DateTimeArgStyle<'s> {
456    Style(&'s str),
457    Skeleton(DateTimeSkeleton),
458}
459
460#[derive(Clone, Debug, Eq, PartialEq, Serialize_repr)]
461#[repr(u8)]
462pub enum SkeletonType {
463    Number,
464    DateTime,
465}
466
467#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
468#[serde(rename_all = "camelCase")]
469pub struct DateTimeSkeleton {
470    #[serde(rename = "type")]
471    pub skeleton_type: SkeletonType,
472    pub pattern: String,
473    #[serde(skip_serializing_if = "Option::is_none")]
474    pub location: Option<Span>,
475    pub parsed_options: JsIntlDateTimeFormatOptions,
476}
477
478#[derive(Clone, Debug, PartialEq, Serialize)]
479#[serde(rename_all = "camelCase")]
480pub struct PluralOrSelectOption<'s> {
481    pub value: Ast<'s>,
482    #[serde(skip_serializing_if = "Option::is_none")]
483    pub location: Option<Span>,
484}
485
486#[cfg(test)]
487mod tests {
488    use super::*;
489    use crate::intl::number_format_options::JsIntlNumberFormatOptions;
490    use serde_json::json;
491
492    #[test]
493    fn serialize_number_arg_style_with_skeleton() {
494        similar_asserts::assert_eq!(
495            serde_json::to_value(NumberArgStyle::Skeleton(NumberSkeleton {
496                skeleton_type: SkeletonType::Number,
497                tokens: vec![NumberSkeletonToken {
498                    stem: "foo",
499                    options: vec!["bar", "baz"]
500                }],
501                location: Some(Span::new(Position::new(0, 1, 1), Position::new(11, 1, 12))),
502                parsed_options: JsIntlNumberFormatOptions::default(),
503            }))
504            .unwrap(),
505            json!({
506                "tokens": [{
507                    "stem": "foo",
508                    "options": [
509                        "bar",
510                        "baz"
511                    ]
512                }],
513                "location": {
514                    "start": {
515                        "offset": 0,
516                        "line": 1,
517                        "column": 1,
518                    },
519                    "end": {
520                        "offset": 11,
521                        "line": 1,
522                        "column": 12,
523                    }
524                },
525                "type": 0,
526                "parsedOptions": {},
527            })
528        );
529    }
530
531    #[test]
532    fn serialize_number_arg_style_string() {
533        similar_asserts::assert_eq!(
534            serde_json::to_value(NumberArgStyle::Style("percent")).unwrap(),
535            json!("percent")
536        )
537    }
538
539    #[test]
540    fn serialize_plural_type() {
541        similar_asserts::assert_eq!(
542            serde_json::to_value(PluralType::Cardinal).unwrap(),
543            json!("cardinal")
544        )
545    }
546}