icu_messageformat_parser/
parser.rs

1use crate::ast::{self, *};
2use crate::intl::date_time_format_options::JsIntlDateTimeFormatOptions;
3use crate::intl::number_format_options::JsIntlNumberFormatOptions;
4use crate::intl::options::{
5    CompactDisplay, DateTimeDisplayFormat, DateTimeMonthDisplayFormat, HourCycle, Notation,
6    NumberFormatOptionsCurrencyDisplay, NumberFormatOptionsCurrencySign,
7    NumberFormatOptionsRoundingPriority, NumberFormatOptionsSignDisplay, NumberFormatOptionsStyle,
8    NumberFormatOptionsTrailingZeroDisplay, TimeZoneNameFormat, UnitDisplay,
9};
10use crate::pattern_syntax::is_pattern_syntax;
11use langtag::LanguageTag;
12use once_cell::sync::Lazy;
13use regex::Regex as Regexp;
14use serde::{Deserialize, Serialize};
15use std::cell::Cell;
16use std::cmp;
17use std::collections::HashSet;
18use std::result;
19#[cfg(feature = "utf16")]
20use widestring::{Utf16Str, Utf16String};
21
22type Result<T> = result::Result<T, ast::Error>;
23
24pub static FRACTION_PRECISION_REGEX: Lazy<Regexp> =
25    Lazy::new(|| Regexp::new(r"^\.(?:(0+)(\*)?|(#+)|(0+)(#+))$").unwrap());
26pub static SIGNIFICANT_PRECISION_REGEX: Lazy<Regexp> =
27    Lazy::new(|| Regexp::new(r"^(@+)?(\+|#+)?[rs]?$").unwrap());
28pub static INTEGER_WIDTH_REGEX: Lazy<Regexp> =
29    Lazy::new(|| Regexp::new(r"(\*)(0+)|(#+)(0+)|(0+)").unwrap());
30pub static CONCISE_INTEGER_WIDTH_REGEX: Lazy<Regexp> =
31    Lazy::new(|| Regexp::new(r"^(0+)$").unwrap());
32
33/// https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
34/// Credit: https://github.com/caridy/intl-datetimeformat-pattern/blob/master/index.js
35/// with some tweaks
36/// TODO: This is incomplete
37pub static DATE_TIME_REGEX: Lazy<Regexp> = Lazy::new(|| {
38    Regexp::new(r"(?:[Eec]{1,6}|G{1,5}|[Qq]{1,5}|(?:[yYur]+|U{1,5})|[ML]{1,5}|d{1,2}|D{1,3}|F{1}|[abB]{1,5}|[hkHK]{1,2}|w{1,2}|W{1}|m{1,2}|s{1,2}|[zZOvVxX]{1,4})").unwrap()
39});
40
41#[derive(Clone, Debug)]
42pub struct Parser<'s> {
43    position: Cell<Position>,
44    message: &'s str,
45    options: ParserOptions,
46    #[cfg(feature = "utf16")]
47    message_utf16: Utf16String,
48}
49
50#[derive(Default, Debug, Eq, PartialEq, Clone, Serialize, Deserialize)]
51#[serde(rename_all = "camelCase")]
52pub struct ParserOptions {
53    /// Whether to treat HTML/XML tags as string literal
54    /// instead of parsing them as tag token.
55    /// When this is false we only allow simple tags without
56    /// any attributes
57    #[serde(default)]
58    pub ignore_tag: bool,
59
60    /// Should `select`, `selectordinal`, and `plural` arguments always include
61    /// the `other` case clause.
62    #[serde(default)]
63    pub requires_other_clause: bool,
64
65    /// Whether to parse number/datetime skeleton
66    /// into Intl.NumberFormatOptions and Intl.DateTimeFormatOptions, respectively
67    #[serde(default)]
68    pub should_parse_skeletons: bool,
69
70    /// Capture location info in AST
71    /// Default is false
72    #[serde(default)]
73    pub capture_location: bool,
74
75    /// Instance of Intl.Locale to resolve locale-dependent skeleton
76    #[serde(default)]
77    pub locale: Option<String>,
78}
79
80impl ParserOptions {
81    pub fn new(
82        ignore_tag: bool,
83        requires_other_clause: bool,
84        should_parse_skeletons: bool,
85        capture_location: bool,
86        locale: Option<String>,
87    ) -> Self {
88        ParserOptions {
89            ignore_tag,
90            requires_other_clause,
91            should_parse_skeletons,
92            capture_location,
93            locale,
94        }
95    }
96}
97
98fn is_whitespace(ch: char) -> bool {
99    ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
100}
101
102fn is_alpha(ch: Option<char>) -> bool {
103    if let Some(ch) = ch {
104        matches!(ch, 'a'..='z' | 'A'..='Z')
105    } else {
106        false
107    }
108}
109
110fn get_default_hour_symbol_from_locale(locale: &str) -> char {
111    let language_tag = LanguageTag::parse(locale).expect("Should able to parse locale tag");
112
113    // There's no built in Intl.Locale, manually read through extensions for the values we need to read
114    for extension in language_tag.extensions() {
115        //TODO: locale.hourCycles support is missing
116
117        let hour_cycle = if extension.singleton() as char == 'u' {
118            let mut ret = None;
119            let mut ext_iter = extension.iter();
120            loop {
121                let ext = ext_iter.next();
122
123                if let Some(ext) = ext {
124                    if ext == "hc" {
125                        let hour_cycle = ext_iter.next().expect("Should have hour cycle");
126                        ret = match hour_cycle.as_str() {
127                            "h11" => Some(HourCycle::H11),
128                            "h12" => Some(HourCycle::H12),
129                            "h23" => Some(HourCycle::H23),
130                            "h24" => Some(HourCycle::H24),
131                            _ => None,
132                        };
133                    }
134                } else {
135                    break;
136                }
137            }
138            ret
139        } else {
140            None
141        };
142
143        if let Some(hour_cycle) = hour_cycle {
144            return match hour_cycle {
145                HourCycle::H11 => 'K',
146                HourCycle::H12 => 'h',
147                HourCycle::H23 => 'H',
148                HourCycle::H24 => 'k',
149            };
150        }
151
152        //TODO: locale.language data generation
153    }
154
155    panic!("Should have hour cycle");
156}
157
158fn get_best_pattern(skeleton: &str, locale: &str) -> String {
159    let mut ret = "".to_string();
160
161    let skeleton_chars: Vec<_> = skeleton.chars().collect();
162    let skeleton_char_len = skeleton_chars.len();
163    let mut extra_len = 0;
164
165    for (pattern_pos, pattern_char) in skeleton.chars().enumerate() {
166        if pattern_char == 'j' {
167            if pattern_pos + 1 < skeleton_char_len
168                && skeleton_chars[pattern_pos + 1] == pattern_char
169            {
170                extra_len += 1;
171                continue;
172            } else {
173                let mut hour_len = 1 + (extra_len & 1);
174                let mut day_period_len = if extra_len < 2 {
175                    1
176                } else {
177                    3 + (extra_len >> 1)
178                };
179                let day_period_char = 'a';
180                let hour_char = get_default_hour_symbol_from_locale(locale);
181
182                if hour_char == 'H' || hour_char == 'k' {
183                    day_period_len = 0;
184                }
185
186                while day_period_len > 0 {
187                    ret = format!("{}{}", ret, day_period_char);
188                    day_period_len -= 1;
189                }
190
191                while hour_len > 0 {
192                    ret = format!("{}{}", hour_char, ret);
193                    hour_len -= 1;
194                }
195            }
196        } else if pattern_char == 'J' {
197            ret = format!("{}H", ret);
198        } else {
199            ret = format!("{}{}", ret, pattern_char);
200        }
201    }
202
203    ret
204}
205
206fn parse_date_time_skeleton(skeleton: &str) -> JsIntlDateTimeFormatOptions {
207    let mut ret = JsIntlDateTimeFormatOptions::default();
208
209    for caps in DATE_TIME_REGEX.captures_iter(skeleton) {
210        let match_str = caps.get(0).map(|m| m.as_str()).unwrap_or_default();
211        let match_len = match_str.len();
212
213        match &match_str.chars().next().unwrap_or_default() {
214            // Era
215            'G' => {
216                if match_len == 4 {
217                    ret.era = Some(UnitDisplay::Long);
218                } else if match_len == 5 {
219                    ret.era = Some(UnitDisplay::Narrow);
220                } else {
221                    ret.era = Some(UnitDisplay::Short);
222                }
223            }
224            // Year
225            'y' => {
226                if match_len == 2 {
227                    ret.year = Some(DateTimeDisplayFormat::TwoDigit);
228                } else {
229                    ret.year = Some(DateTimeDisplayFormat::Numeric);
230                }
231            }
232            'Y' | 'u' | 'U' | 'r' => {
233                panic!("`Y/u/U/r` (year) patterns are not supported, use `y` instead");
234            }
235            // Quarter
236            'q' | 'Q' => {
237                panic!("`q/Q` (quarter) patterns are not supported");
238            }
239            // Month
240            'M' | 'L' => {
241                if match_len == 1 {
242                    ret.month = Some(DateTimeMonthDisplayFormat::Numeric);
243                } else if match_len == 2 {
244                    ret.month = Some(DateTimeMonthDisplayFormat::TwoDigit);
245                } else if match_len == 3 {
246                    ret.month = Some(DateTimeMonthDisplayFormat::Short);
247                } else if match_len == 4 {
248                    ret.month = Some(DateTimeMonthDisplayFormat::Long);
249                } else if match_len == 5 {
250                    ret.month = Some(DateTimeMonthDisplayFormat::Narrow);
251                }
252            }
253            // Week
254            'w' | 'W' => {
255                panic!("`w/W` (week) patterns are not supported");
256            }
257            'd' => {
258                if match_len == 1 {
259                    ret.day = Some(DateTimeDisplayFormat::Numeric);
260                } else if match_len == 2 {
261                    ret.day = Some(DateTimeDisplayFormat::TwoDigit);
262                }
263            }
264            'D' | 'F' | 'g' => {
265                panic!("`D/F/g` (day) patterns are not supported, use `d` instead");
266            }
267            'E' => {
268                if match_len == 4 {
269                    ret.weekday = Some(UnitDisplay::Short);
270                } else if match_len == 5 {
271                    ret.weekday = Some(UnitDisplay::Narrow);
272                } else {
273                    ret.weekday = Some(UnitDisplay::Short);
274                }
275            }
276            'e' => {
277                if match_len < 4 {
278                    panic!("`e..eee` (weekday) patterns are not supported");
279                }
280
281                if match_len == 4 {
282                    ret.weekday = Some(UnitDisplay::Short);
283                } else if match_len == 5 {
284                    ret.weekday = Some(UnitDisplay::Long);
285                } else if match_len == 6 {
286                    ret.weekday = Some(UnitDisplay::Narrow);
287                } else if match_len == 7 {
288                    ret.weekday = Some(UnitDisplay::Short);
289                }
290            }
291            'c' => {
292                if match_len < 4 {
293                    panic!("`c..ccc` (weekday) patterns are not supported");
294                }
295
296                if match_len == 4 {
297                    ret.weekday = Some(UnitDisplay::Short);
298                } else if match_len == 5 {
299                    ret.weekday = Some(UnitDisplay::Long);
300                } else if match_len == 6 {
301                    ret.weekday = Some(UnitDisplay::Narrow);
302                } else if match_len == 7 {
303                    ret.weekday = Some(UnitDisplay::Short);
304                }
305            }
306            // Period
307            'a' => {
308                // AM, PM
309                ret.hour12 = Some(true)
310            }
311            'b' /*  am, pm, noon, midnight*/ | 'B' /*  flexible day periods */ => {
312                panic!("`b/B` (period) patterns are not supported, use `a` instead");
313            }
314            //Hour
315            'h' => {
316                ret.hour_cycle = Some(HourCycle::H12);
317                if match_len == 1 {
318                    ret.hour = Some(DateTimeDisplayFormat::Numeric);
319                } else if match_len == 2 {
320                    ret.hour = Some(DateTimeDisplayFormat::TwoDigit);
321                }
322            }
323            'H' => {
324                ret.hour_cycle = Some(HourCycle::H23);
325                if match_len == 1 {
326                    ret.hour = Some(DateTimeDisplayFormat::Numeric);
327                } else if match_len == 2 {
328                    ret.hour = Some(DateTimeDisplayFormat::TwoDigit);
329                }
330            }
331            'K' => {
332                ret.hour_cycle = Some(HourCycle::H11);
333                if match_len == 1 {
334                    ret.hour = Some(DateTimeDisplayFormat::Numeric);
335                } else if match_len == 2 {
336                    ret.hour = Some(DateTimeDisplayFormat::TwoDigit);
337                }
338            }
339            'k' => {
340                ret.hour_cycle = Some(HourCycle::H24);
341                if match_len == 1 {
342                    ret.hour = Some(DateTimeDisplayFormat::Numeric);
343                } else if match_len == 2 {
344                    ret.hour = Some(DateTimeDisplayFormat::TwoDigit);
345                }
346            }
347            'j' | 'J' | 'C' => {
348                panic!("`j/J/C` (hour) patterns are not supported, use `h/H/K/k` instead");
349            }
350            // Minute
351            'm' => {
352                if match_len == 1 {
353                    ret.minute = Some(DateTimeDisplayFormat::Numeric);
354                } else if match_len == 2 {
355                    ret.minute = Some(DateTimeDisplayFormat::TwoDigit);
356                }
357            }
358            // Second
359            's' => {
360                if match_len == 1 {
361                    ret.second = Some(DateTimeDisplayFormat::Numeric);
362                } else if match_len == 2 {
363                    ret.second = Some(DateTimeDisplayFormat::TwoDigit);
364                }
365            }
366            'S' | 'A' => { panic!("`S/A` (second) patterns are not supported, use `s` instead'"); }
367            // Zone
368            'z' => {
369                // 1..3, 4: specific non-location format
370                ret.time_zone_name = if match_len < 4 { Some(TimeZoneNameFormat::Short) } else {
371                    Some(TimeZoneNameFormat::Long)
372                };
373            }
374            'Z' /* 1..3, 4, 5: The ISO8601 varios formats */ |
375            'O' /* 1, 4: miliseconds in day short, long */ |
376            'v' /* 1, 4: generic non-location format */ |
377            'V' /* 1, 2, 3, 4: time zone ID or city */ |
378            'X' /* 1, 2, 3, 4: The ISO8601 varios formats */ |
379            'x' /* 1, 2, 3, 4: The ISO8601 varios formats */ => {
380                panic!("`Z/O/v/V/X/x` (timeZone) patterns are not supported, use `z` instead'");
381            }
382            _ => {}
383        }
384    }
385
386    ret
387}
388
389fn icu_unit_to_ecma(value: &str) -> Option<String> {
390    Some(
391        Regexp::new(r"^(.*?)-")
392            .unwrap()
393            .replace(value, "")
394            .to_string(),
395    )
396}
397
398fn parse_significant_precision(ret: &mut JsIntlNumberFormatOptions, value: &str) {
399    if let Some(l) = value.chars().last() {
400        if l == 'r' {
401            ret.rounding_priority = Some(NumberFormatOptionsRoundingPriority::MorePrecision);
402        } else if l == 's' {
403            ret.rounding_priority = Some(NumberFormatOptionsRoundingPriority::LessPrecision);
404        }
405    }
406
407    let cap = SIGNIFICANT_PRECISION_REGEX.captures(value);
408    if let Some(cap) = cap {
409        let g1 = cap.get(1);
410        let g2 = cap.get(2);
411
412        let g1_len = g1.map(|g| g.as_str().len() as u32);
413        let is_g2_non_str = g2.is_none()
414            || g2
415                .map(|g| g.as_str().parse::<u32>().is_ok())
416                .unwrap_or(false);
417
418        // @@@ case
419        if is_g2_non_str {
420            ret.minimum_significant_digits = g1_len;
421            ret.maximum_significant_digits = g1_len;
422        }
423        // @@@+ case
424        else if g2.map(|g| g.as_str() == "+").unwrap_or(false) {
425            ret.minimum_significant_digits = g1_len;
426        }
427        // .### case
428        else if g1.map(|g| g.as_str().starts_with("#")).unwrap_or(false) {
429            ret.maximum_significant_digits = g1_len;
430        }
431        // .@@## or .@@@ case
432        else {
433            ret.minimum_significant_digits = g1_len;
434            ret.maximum_significant_digits =
435                g1_len.map(|l| l + g2.map(|g| g.as_str().len() as u32).unwrap_or(0));
436        }
437    }
438}
439
440fn parse_sign(ret: &mut JsIntlNumberFormatOptions, value: &str) {
441    match value {
442        "sign-auto" => {
443            ret.sign_display = Some(NumberFormatOptionsSignDisplay::Auto);
444        }
445        "sign-accounting" | "()" => {
446            ret.currency_sign = Some(NumberFormatOptionsCurrencySign::Accounting);
447        }
448        "sign-always" | "+!" => {
449            ret.sign_display = Some(NumberFormatOptionsSignDisplay::Always);
450        }
451        "sign-accounting-always" | "()!" => {
452            ret.sign_display = Some(NumberFormatOptionsSignDisplay::Always);
453            ret.currency_sign = Some(NumberFormatOptionsCurrencySign::Accounting);
454        }
455        "sign-except-zero" | "+?" => {
456            ret.sign_display = Some(NumberFormatOptionsSignDisplay::ExceptZero);
457        }
458        "sign-accounting-except-zero" | "()?" => {
459            ret.sign_display = Some(NumberFormatOptionsSignDisplay::ExceptZero);
460            ret.currency_sign = Some(NumberFormatOptionsCurrencySign::Accounting);
461        }
462        "sign-never" | "+_" => {
463            ret.sign_display = Some(NumberFormatOptionsSignDisplay::Never);
464        }
465        _ => {}
466    }
467}
468
469fn parse_concise_scientific_and_engineering_stem(ret: &mut JsIntlNumberFormatOptions, stem: &str) {
470    let mut stem = stem;
471    let mut has_sign = false;
472    if stem.starts_with("EE") {
473        ret.notation = Some(Notation::Engineering);
474        stem = &stem[2..];
475        has_sign = true;
476    } else if stem.starts_with("E") {
477        ret.notation = Some(Notation::Scientific);
478        stem = &stem[1..];
479        has_sign = true;
480    }
481
482    if has_sign {
483        let sign_display = &stem[0..2];
484        match sign_display {
485            "+!" => {
486                ret.sign_display = Some(NumberFormatOptionsSignDisplay::Always);
487                stem = &stem[2..];
488            }
489            "+?" => {
490                ret.sign_display = Some(NumberFormatOptionsSignDisplay::ExceptZero);
491                stem = &stem[2..];
492            }
493            _ => {}
494        }
495
496        if !CONCISE_INTEGER_WIDTH_REGEX.is_match(stem) {
497            panic!("Malformed concise eng/scientific notation");
498        }
499
500        ret.minimum_integer_digits = Some(stem.len() as u32);
501    }
502}
503
504fn parse_number_skeleton(skeleton: &Vec<NumberSkeletonToken>) -> JsIntlNumberFormatOptions {
505    let mut ret = JsIntlNumberFormatOptions::default();
506    for token in skeleton {
507        match token.stem {
508            "percent" | "%" => {
509                ret.style = Some(NumberFormatOptionsStyle::Percent);
510                continue;
511            }
512            "%x100" => {
513                ret.style = Some(NumberFormatOptionsStyle::Percent);
514                ret.scale = Some(100.0);
515                continue;
516            }
517            "currency" => {
518                ret.style = Some(NumberFormatOptionsStyle::Currency);
519                ret.currency = Some(token.options[0].to_string());
520                continue;
521            }
522            "group-off" | ",_" => {
523                ret.use_grouping = Some(false);
524                continue;
525            }
526            "precision-integer" | "." => {
527                ret.maximum_fraction_digits = Some(0);
528                continue;
529            }
530            "measure-unit" | "unit" => {
531                ret.style = Some(NumberFormatOptionsStyle::Unit);
532                ret.unit = icu_unit_to_ecma(token.options[0]);
533                continue;
534            }
535            "compact-short" | "K" => {
536                ret.notation = Some(Notation::Compact);
537                ret.compact_display = Some(CompactDisplay::Short);
538                continue;
539            }
540            "compact-long" | "KK" => {
541                ret.notation = Some(Notation::Compact);
542                ret.compact_display = Some(CompactDisplay::Long);
543                continue;
544            }
545            "scientific" => {
546                ret.notation = Some(Notation::Scientific);
547                for opt in &token.options {
548                    parse_sign(&mut ret, opt);
549                }
550                continue;
551            }
552            "engineering" => {
553                ret.notation = Some(Notation::Engineering);
554                for opt in &token.options {
555                    parse_sign(&mut ret, opt);
556                }
557                continue;
558            }
559            "notation-simple" => {
560                ret.notation = Some(Notation::Standard);
561                continue;
562            }
563            // https://github.com/unicode-org/icu/blob/master/icu4c/source/i18n/unicode/unumberformatter.h
564            "unit-width-narrow" => {
565                ret.currency_display = Some(NumberFormatOptionsCurrencyDisplay::NarrowSymbol);
566                ret.unit_display = Some(UnitDisplay::Narrow);
567                continue;
568            }
569            "unit-width-short" => {
570                ret.currency_display = Some(NumberFormatOptionsCurrencyDisplay::Code);
571                ret.unit_display = Some(UnitDisplay::Short);
572                continue;
573            }
574            "unit-width-full-name" => {
575                ret.currency_display = Some(NumberFormatOptionsCurrencyDisplay::Name);
576                ret.unit_display = Some(UnitDisplay::Long);
577                continue;
578            }
579            "unit-width-iso-code" => {
580                ret.currency_display = Some(NumberFormatOptionsCurrencyDisplay::Symbol);
581                continue;
582            }
583            "scale" => {
584                ret.scale = token.options[0].parse().ok();
585                continue;
586            }
587            "integer-width" => {
588                let cap = INTEGER_WIDTH_REGEX.captures(token.options[0]);
589                if let Some(cap) = cap {
590                    if cap.get(1).is_some() {
591                        ret.minimum_integer_digits = cap.get(2).map(|c| c.as_str().len() as u32);
592                    } else if cap.get(3).is_some() && cap.get(4).is_some() {
593                        panic!("We currently do not support maximum integer digits");
594                    } else if cap.get(5).is_some() {
595                        panic!("We currently do not support exact integer digits");
596                    }
597                }
598                continue;
599            }
600            _ => {
601                //noop
602            }
603        }
604
605        // https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html#integer-width
606        if CONCISE_INTEGER_WIDTH_REGEX.is_match(token.stem) {
607            ret.minimum_integer_digits = Some(token.stem.len() as u32);
608            continue;
609        }
610
611        if FRACTION_PRECISION_REGEX.is_match(token.stem) {
612            // Precision
613            // https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html#fraction-precision
614            // precision-integer case
615            let caps = FRACTION_PRECISION_REGEX.captures(token.stem);
616            if let Some(caps) = caps {
617                let g1_len = caps.get(1).map(|g| g.as_str().len() as u32);
618                let g2 = caps.get(2);
619                let g3 = caps.get(3);
620                let g4 = caps.get(4);
621                let g5 = caps.get(5);
622
623                // .000* case (before ICU67 it was .000+)
624                if g2.map(|g| g.as_str() == "*").unwrap_or(false) {
625                    ret.minimum_fraction_digits = g1_len;
626                }
627                // .### case
628                else if g3.map(|g| g.as_str().starts_with("#")).unwrap_or(false) {
629                    ret.maximum_fraction_digits = g3.map(|g| g.as_str().len() as u32);
630                }
631                // .00## case
632                else if g4.is_some() && g5.is_some() {
633                    ret.minimum_fraction_digits = g4.map(|g| g.as_str().len() as u32);
634                    ret.maximum_fraction_digits =
635                        Some(g4.unwrap().as_str().len() as u32 + g5.unwrap().as_str().len() as u32);
636                } else {
637                    ret.minimum_fraction_digits = g1_len;
638                    ret.maximum_fraction_digits = g1_len;
639                }
640
641                let opt = token.options.get(0);
642                // https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html#trailing-zero-display
643                if let Some(opt) = opt {
644                    if *opt == "w" {
645                        ret.trailing_zero_display =
646                            Some(NumberFormatOptionsTrailingZeroDisplay::StripIfInteger);
647                    } else {
648                        parse_significant_precision(&mut ret, opt);
649                    }
650                }
651            }
652            continue;
653        }
654
655        // https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html#significant-digits-precision
656        if SIGNIFICANT_PRECISION_REGEX.is_match(token.stem) {
657            parse_significant_precision(&mut ret, token.stem);
658            continue;
659        }
660
661        parse_sign(&mut ret, token.stem);
662        parse_concise_scientific_and_engineering_stem(&mut ret, token.stem);
663    }
664    ret
665}
666
667impl<'s> Parser<'s> {
668    pub fn new(message: &'s str, options: &ParserOptions) -> Parser<'s> {
669        Parser {
670            message,
671            #[cfg(feature = "utf16")]
672            message_utf16: Utf16String::from(message),
673            position: Cell::new(Position {
674                offset: 0,
675                line: 1,
676                column: 1,
677            }),
678            options: options.clone(),
679        }
680    }
681
682    pub fn parse(&mut self) -> Result<Ast> {
683        assert_eq!(self.offset(), 0, "parser can only be used once");
684        self.parse_message(0, "", false)
685    }
686
687    /// # Arguments
688    ///
689    /// * `nesting_level` - The nesting level of the message. This can be positive if the message
690    ///   is nested inside the plural or select argument's selector clause.
691    /// * `parent_arg_type` - If nested, this is the parent plural or selector's argument type.
692    ///   Otherwise this should just be an empty string.
693    /// * `expecting_close_tag` - If true, this message is directly or indirectly nested inside
694    ///   between a pair of opening and closing tags. The nested message will not parse beyond
695    ///   the closing tag boundary.
696    fn parse_message(
697        &self,
698        nesting_level: usize,
699        parent_arg_type: &str,
700        expecting_close_tag: bool,
701    ) -> Result<Ast> {
702        let mut elements: Vec<AstElement> = vec![];
703
704        while !self.is_eof() {
705            elements.push(match self.char() {
706                '{' => self.parse_argument(nesting_level, expecting_close_tag)?,
707                '}' if nesting_level > 0 => break,
708                '#' if matches!(parent_arg_type, "plural" | "selectordinal") => {
709                    let position = self.position();
710                    self.bump();
711                    AstElement::Pound(Span::new(position, self.position()))
712                }
713                '<' if !self.options.ignore_tag && self.peek() == Some('/') => {
714                    if expecting_close_tag {
715                        break;
716                    } else {
717                        return Err(self.error(
718                            ErrorKind::UnmatchedClosingTag,
719                            Span::new(self.position(), self.position()),
720                        ));
721                    }
722                }
723                '<' if !self.options.ignore_tag && is_alpha(self.peek()) => {
724                    self.parse_tag(nesting_level, parent_arg_type)?
725                }
726                _ => self.parse_literal(nesting_level, parent_arg_type)?,
727            })
728        }
729
730        Ok(elements)
731    }
732
733    fn position(&self) -> Position {
734        self.position.get()
735    }
736
737    /// A tag name must start with an ASCII lower case letter. The grammar is based on the
738    /// [custom element name][] except that a dash is NOT always mandatory and uppercase letters
739    /// are accepted:
740    ///
741    /// ```ignore
742    /// tag ::= "<" tagName (whitespace)* "/>" | "<" tagName (whitespace)* ">" message "</" tagName (whitespace)* ">"
743    /// tagName ::= [a-z] (PENChar)*
744    /// PENChar ::=
745    ///     "-" | "." | [0-9] | "_" | [a-z] | [A-Z] | #xB7 | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x37D] |
746    ///     [#x37F-#x1FFF] | [#x200C-#x200D] | [#x203F-#x2040] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
747    ///     [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
748    /// ```
749    ///
750    /// [custom element name]: https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
751    fn parse_tag(&self, nesting_level: usize, parent_arg_type: &str) -> Result<AstElement> {
752        let start_position = self.position();
753        self.bump(); // '<'
754
755        let tag_name = self.parse_tag_name();
756        self.bump_space();
757
758        if self.bump_if("/>") {
759            // Self closing tag
760            Ok(AstElement::Literal {
761                value: format!("<{}/>", tag_name),
762                span: if self.options.capture_location {
763                    Some(Span::new(start_position, self.position()))
764                } else {
765                    None
766                },
767            })
768        } else if self.bump_if(">") {
769            let children = self.parse_message(nesting_level + 1, parent_arg_type, true)?;
770
771            // Expecting a close tag
772            let end_tag_start_position = self.position();
773
774            if self.bump_if("</") {
775                if self.is_eof() || !is_alpha(Some(self.char())) {
776                    return Err(self.error(
777                        ErrorKind::InvalidTag,
778                        Span::new(end_tag_start_position, self.position()),
779                    ));
780                }
781
782                let closing_tag_name_start_position = self.position();
783                let closing_tag_name = self.parse_tag_name();
784                if tag_name != closing_tag_name {
785                    return Err(self.error(
786                        ErrorKind::UnmatchedClosingTag,
787                        Span::new(closing_tag_name_start_position, self.position()),
788                    ));
789                }
790
791                self.bump_space();
792                if !self.bump_if(">") {
793                    let span = Span::new(end_tag_start_position, self.position());
794                    return Err(self.error(ErrorKind::InvalidTag, span));
795                }
796
797                Ok(AstElement::Tag {
798                    value: tag_name,
799                    span: if self.options.capture_location {
800                        Some(Span::new(start_position, self.position()))
801                    } else {
802                        None
803                    },
804                    children: Box::new(children),
805                })
806            } else {
807                Err(self.error(
808                    ErrorKind::UnclosedTag,
809                    Span::new(start_position, self.position()),
810                ))
811            }
812        } else {
813            Err(self.error(
814                ErrorKind::InvalidTag,
815                Span::new(start_position, self.position()),
816            ))
817        }
818    }
819
820    fn parse_tag_name(&self) -> &str {
821        let start_offset = self.offset();
822
823        self.bump(); // the first tag name character
824        while !self.is_eof() && is_potential_element_name_char(self.char()) {
825            self.bump();
826        }
827
828        &self.message[start_offset..self.offset()]
829    }
830
831    fn parse_literal(&self, nesting_level: usize, parent_arg_type: &str) -> Result<AstElement> {
832        let start = self.position();
833
834        let mut value = String::new();
835        loop {
836            if self.bump_if("''") {
837                value.push('\'');
838            } else if let Some(fragment) = self.try_parse_quote(parent_arg_type) {
839                value.push_str(&fragment);
840            } else if let Some(fragment) = self.try_parse_unquoted(nesting_level, parent_arg_type) {
841                value.push(fragment);
842            } else if let Some(fragment) = self.try_parse_left_angle_bracket() {
843                value.push(fragment);
844            } else {
845                break;
846            }
847        }
848
849        let span = Span::new(start, self.position());
850        Ok(AstElement::Literal {
851            span: if self.options.capture_location {
852                Some(span)
853            } else {
854                None
855            },
856            value,
857        })
858    }
859
860    /// Starting with ICU 4.8, an ASCII apostrophe only starts quoted text if it immediately precedes
861    /// a character that requires quoting (that is, "only where needed"), and works the same in
862    /// nested messages as on the top level of the pattern. The new behavior is otherwise compatible.
863    fn try_parse_quote(&self, parent_arg_type: &str) -> Option<String> {
864        if self.is_eof() || self.char() != '\'' {
865            return None;
866        }
867
868        // Parse escaped char following the apostrophe, or early return if there is no escaped char.
869        // Check if is valid escaped character
870        match self.peek() {
871            Some('{') | Some('<') | Some('>') | Some('}') => (),
872            Some('#') if matches!(parent_arg_type, "plural" | "selectordinal") => (),
873            _ => {
874                return None;
875            }
876        }
877
878        self.bump(); // apostrophe
879        let mut value = self.char().to_string(); // escaped char
880        self.bump();
881
882        // read chars until the optional closing apostrophe is found
883        loop {
884            if self.is_eof() {
885                break;
886            }
887            match self.char() {
888                '\'' if self.peek() == Some('\'') => {
889                    value.push('\'');
890                    // Bump one more time because we need to skip 2 characters.
891                    self.bump();
892                }
893                '\'' => {
894                    // Optional closing apostrophe.
895                    self.bump();
896                    break;
897                }
898                c => value.push(c),
899            }
900            self.bump();
901        }
902
903        Some(value)
904    }
905
906    fn try_parse_unquoted(&self, nesting_level: usize, parent_arg_type: &str) -> Option<char> {
907        if self.is_eof() {
908            return None;
909        }
910        match self.char() {
911            '<' | '{' => None,
912            '#' if parent_arg_type == "plural" || parent_arg_type == "selectordinal" => None,
913            '}' if nesting_level > 0 => None,
914            c => {
915                self.bump();
916                Some(c)
917            }
918        }
919    }
920
921    fn try_parse_left_angle_bracket(&self) -> Option<char> {
922        if !self.is_eof()
923            && self.char() == '<'
924            && (self.options.ignore_tag
925                // If at the opening tag or closing tag position, bail.
926                || !(matches!(self.peek(), Some(c) if is_alpha(Some(c)) || c == '/')))
927        {
928            self.bump(); // `<`
929            Some('<')
930        } else {
931            None
932        }
933    }
934
935    fn parse_argument(
936        &self,
937        nesting_level: usize,
938        expecting_close_tag: bool,
939    ) -> Result<AstElement> {
940        let opening_brace_position = self.position();
941        self.bump(); // `{`
942
943        self.bump_space();
944
945        if self.is_eof() {
946            return Err(self.error(
947                ErrorKind::ExpectArgumentClosingBrace,
948                Span::new(opening_brace_position, self.position()),
949            ));
950        }
951
952        if self.char() == '}' {
953            self.bump();
954            return Err(self.error(
955                ErrorKind::EmptyArgument,
956                Span::new(opening_brace_position, self.position()),
957            ));
958        }
959
960        // argument name
961        let value = self.parse_identifier_if_possible().0.to_string();
962        if value.is_empty() {
963            return Err(self.error(
964                ErrorKind::MalformedArgument,
965                Span::new(opening_brace_position, self.position()),
966            ));
967        }
968
969        self.bump_space();
970
971        if self.is_eof() {
972            return Err(self.error(
973                ErrorKind::ExpectArgumentClosingBrace,
974                Span::new(opening_brace_position, self.position()),
975            ));
976        }
977
978        match self.char() {
979            // Simple argument: `{name}`
980            '}' => {
981                self.bump(); // `}`
982
983                Ok(AstElement::Argument {
984                    // value does not include the opening and closing braces.
985                    value,
986                    span: if self.options.capture_location {
987                        Some(Span::new(opening_brace_position, self.position()))
988                    } else {
989                        None
990                    },
991                })
992            }
993
994            // Argument with options: `{name, format, ...}`
995            ',' => {
996                self.bump(); // ','
997                self.bump_space();
998
999                if self.is_eof() {
1000                    return Err(self.error(
1001                        ErrorKind::ExpectArgumentClosingBrace,
1002                        Span::new(opening_brace_position, self.position()),
1003                    ));
1004                }
1005
1006                self.parse_argument_options(
1007                    nesting_level,
1008                    expecting_close_tag,
1009                    value,
1010                    opening_brace_position,
1011                )
1012            }
1013
1014            _ => Err(self.error(
1015                ErrorKind::MalformedArgument,
1016                Span::new(opening_brace_position, self.position()),
1017            )),
1018        }
1019    }
1020
1021    fn parse_argument_options(
1022        &'s self,
1023        nesting_level: usize,
1024        expecting_close_tag: bool,
1025        value: String,
1026        opening_brace_position: Position,
1027    ) -> Result<AstElement> {
1028        // Parse this range:
1029        // {name, type, style}
1030        //        ^---^
1031        let type_starting_position = self.position();
1032        #[cfg(feature = "utf16")]
1033        let arg_type_utf16 = self.parse_identifier_if_possible().0;
1034        #[cfg(feature = "utf16")]
1035        let arg_type = arg_type_utf16.to_string();
1036        #[cfg(feature = "utf16")]
1037        let arg_type = arg_type.as_str();
1038
1039        #[cfg(not(feature = "utf16"))]
1040        let arg_type = self.parse_identifier_if_possible().0;
1041        let type_end_position = self.position();
1042
1043        match arg_type {
1044            "" => {
1045                // Expecting a style string number, date, time, plural, selectordinal, or select.
1046                Err(self.error(
1047                    ErrorKind::ExpectArgumentType,
1048                    Span::new(type_starting_position, type_end_position),
1049                ))
1050            }
1051
1052            "number" | "date" | "time" => {
1053                // Parse this range:
1054                // {name, number, style}
1055                //              ^-------^
1056                self.bump_space();
1057
1058                let style_and_span = if self.bump_if(",") {
1059                    self.bump_space();
1060
1061                    let style_start_position = self.position();
1062                    let style = self.parse_simple_arg_style_if_possible()?.trim_end();
1063                    if style.is_empty() {
1064                        return Err(self.error(
1065                            ErrorKind::ExpectArgumentStyle,
1066                            Span::new(self.position(), self.position()),
1067                        ));
1068                    }
1069
1070                    let style_span = Span::new(style_start_position, self.position());
1071                    Some((style, style_span))
1072                } else {
1073                    None
1074                };
1075
1076                self.try_parse_argument_close(opening_brace_position)?;
1077                let span = Span::new(opening_brace_position, self.position());
1078
1079                // Extract style or skeleton
1080                if let Some((style, style_span)) = style_and_span {
1081                    if style.starts_with("::") {
1082                        // Skeleton starts with `::`.
1083                        let skeleton = style[2..].trim_start();
1084
1085                        Ok(match arg_type {
1086                            "number" => {
1087                                let skeleton = parse_number_skeleton_from_string(
1088                                    skeleton,
1089                                    style_span,
1090                                    self.options.should_parse_skeletons,
1091                                    self.options.capture_location,
1092                                )
1093                                .map_err(|kind| self.error(kind, style_span))?;
1094
1095                                AstElement::Number {
1096                                    value,
1097                                    span: if self.options.capture_location {
1098                                        Some(span)
1099                                    } else {
1100                                        None
1101                                    },
1102                                    style: Some(NumberArgStyle::Skeleton(skeleton)),
1103                                }
1104                            }
1105                            _ => {
1106                                if skeleton.is_empty() {
1107                                    return Err(self.error(ErrorKind::ExpectDateTimeSkeleton, span));
1108                                }
1109
1110                                let pattern = if let Some(locale) = &self.options.locale {
1111                                    get_best_pattern(skeleton, &locale)
1112                                } else {
1113                                    skeleton.to_string()
1114                                };
1115
1116                                let parsed_options = if self.options.should_parse_skeletons {
1117                                    parse_date_time_skeleton(&pattern)
1118                                } else {
1119                                    Default::default()
1120                                };
1121
1122                                let style = Some(DateTimeArgStyle::Skeleton(DateTimeSkeleton {
1123                                    skeleton_type: SkeletonType::DateTime,
1124                                    pattern,
1125                                    location: if self.options.capture_location {
1126                                        Some(style_span)
1127                                    } else {
1128                                        None
1129                                    },
1130                                    parsed_options,
1131                                }));
1132                                if arg_type == "date" {
1133                                    AstElement::Date {
1134                                        value,
1135                                        span: if self.options.capture_location {
1136                                            Some(span)
1137                                        } else {
1138                                            None
1139                                        },
1140                                        style,
1141                                    }
1142                                } else {
1143                                    AstElement::Time {
1144                                        value,
1145                                        span: if self.options.capture_location {
1146                                            Some(span)
1147                                        } else {
1148                                            None
1149                                        },
1150                                        style,
1151                                    }
1152                                }
1153                            }
1154                        })
1155                    } else {
1156                        // Regular style
1157                        Ok(match arg_type {
1158                            "number" => AstElement::Number {
1159                                value,
1160                                span: if self.options.capture_location {
1161                                    Some(span)
1162                                } else {
1163                                    None
1164                                },
1165                                style: Some(NumberArgStyle::Style(style)),
1166                            },
1167                            "date" => AstElement::Date {
1168                                value,
1169                                span: if self.options.capture_location {
1170                                    Some(span)
1171                                } else {
1172                                    None
1173                                },
1174                                style: Some(DateTimeArgStyle::Style(style)),
1175                            },
1176                            _ => AstElement::Time {
1177                                value,
1178                                span: if self.options.capture_location {
1179                                    Some(span)
1180                                } else {
1181                                    None
1182                                },
1183                                style: Some(DateTimeArgStyle::Style(style)),
1184                            },
1185                        })
1186                    }
1187                } else {
1188                    // No style
1189                    Ok(match arg_type {
1190                        "number" => AstElement::Number {
1191                            value,
1192                            span: if self.options.capture_location {
1193                                Some(span)
1194                            } else {
1195                                None
1196                            },
1197                            style: None,
1198                        },
1199                        "date" => AstElement::Date {
1200                            value,
1201                            span: if self.options.capture_location {
1202                                Some(span)
1203                            } else {
1204                                None
1205                            },
1206                            style: None,
1207                        },
1208                        _ => AstElement::Time {
1209                            value,
1210                            span: if self.options.capture_location {
1211                                Some(span)
1212                            } else {
1213                                None
1214                            },
1215                            style: None,
1216                        },
1217                    })
1218                }
1219            }
1220
1221            "plural" | "selectordinal" | "select" => {
1222                // Parse this range:
1223                // {name, plural, options}
1224                //              ^---------^
1225                let type_end_position = self.position();
1226
1227                self.bump_space();
1228                if !self.bump_if(",") {
1229                    return Err(self.error(
1230                        ErrorKind::ExpectSelectArgumentOptions,
1231                        Span::new(type_end_position, type_end_position),
1232                    ));
1233                }
1234                self.bump_space();
1235
1236                // Parse offset:
1237                // {name, plural, offset:1, options}
1238                //                ^-----^
1239                //
1240                // or the first option:
1241                //
1242                // {name, plural, one {...} other {...}}
1243                //                ^--^
1244                let mut identifier_and_span = self.parse_identifier_if_possible();
1245
1246                let plural_offset = if arg_type != "select" && identifier_and_span.0 == "offset" {
1247                    if !self.bump_if(":") {
1248                        return Err(self.error(
1249                            ErrorKind::ExpectPluralArgumentOffsetValue,
1250                            Span::new(self.position(), self.position()),
1251                        ));
1252                    }
1253                    self.bump_space();
1254                    let offset = self.try_parse_decimal_integer(
1255                        ErrorKind::ExpectPluralArgumentOffsetValue,
1256                        ErrorKind::InvalidPluralArgumentOffsetValue,
1257                    )?;
1258
1259                    // Parse another identifier for option parsing
1260                    self.bump_space();
1261                    identifier_and_span = self.parse_identifier_if_possible();
1262
1263                    offset
1264                } else {
1265                    0
1266                };
1267
1268                #[cfg(feature = "utf16")]
1269                let options = self.try_parse_plural_or_select_options(
1270                    nesting_level,
1271                    arg_type_utf16,
1272                    expecting_close_tag,
1273                    identifier_and_span,
1274                )?;
1275                #[cfg(not(feature = "utf16"))]
1276                let options = self.try_parse_plural_or_select_options(
1277                    nesting_level,
1278                    arg_type,
1279                    expecting_close_tag,
1280                    identifier_and_span,
1281                )?;
1282                self.try_parse_argument_close(opening_brace_position)?;
1283
1284                let span = Span::new(opening_brace_position, self.position());
1285                match arg_type {
1286                    "select" => Ok(AstElement::Select {
1287                        value,
1288                        span: if self.options.capture_location {
1289                            Some(span)
1290                        } else {
1291                            None
1292                        },
1293                        options,
1294                    }),
1295                    _ => Ok(AstElement::Plural {
1296                        value,
1297                        span: if self.options.capture_location {
1298                            Some(span)
1299                        } else {
1300                            None
1301                        },
1302                        options,
1303                        offset: plural_offset,
1304                        plural_type: if arg_type == "plural" {
1305                            PluralType::Cardinal
1306                        } else {
1307                            PluralType::Ordinal
1308                        },
1309                    }),
1310                }
1311            }
1312
1313            _ => Err(self.error(
1314                ErrorKind::InvalidArgumentType,
1315                Span::new(type_starting_position, type_end_position),
1316            )),
1317        }
1318    }
1319
1320    /// * `nesting_level` - the current nesting level of messages.
1321    ///   This can be positive when parsing message fragment in select or plural argument options.
1322    /// * `parent_arg_type` - the parent argument's type.
1323    /// * `parsed_first_identifier` - if provided, this is the first identifier-like selector of the
1324    ///   argument. It is a by-product of a previous parsing attempt.
1325    /// * `expecting_close_tag` - If true, this message is directly or indirectly nested inside
1326    ///   between a pair of opening and closing tags. The nested message will not parse beyond
1327    ///   the closing tag boundary.    ///
1328    fn try_parse_plural_or_select_options(
1329        &'s self,
1330        nesting_level: usize,
1331        #[cfg(feature = "utf16")] parent_arg_type: &'s Utf16Str,
1332        #[cfg(not(feature = "utf16"))] parent_arg_type: &'s str,
1333        expecting_close_tag: bool,
1334        #[cfg(feature = "utf16")] parsed_first_identifier: (&'s Utf16Str, Span),
1335        #[cfg(not(feature = "utf16"))] parsed_first_identifier: (&'s str, Span),
1336    ) -> Result<PluralOrSelectOptions> {
1337        let mut has_other_clause = false;
1338
1339        let mut options = vec![];
1340        let mut selectors_parsed = HashSet::new();
1341        let (mut selector, mut selector_span) = parsed_first_identifier;
1342        // Parse:
1343        // one {one apple}
1344        // ^--^
1345        loop {
1346            if selector.is_empty() {
1347                let start_position = self.position();
1348                if parent_arg_type != "select" && self.bump_if("=") {
1349                    // Try parse `={number}` selector
1350                    self.try_parse_decimal_integer(
1351                        ErrorKind::ExpectPluralArgumentSelector,
1352                        ErrorKind::InvalidPluralArgumentSelector,
1353                    )?;
1354                    selector_span = Span::new(start_position, self.position());
1355                    #[cfg(feature = "utf16")]
1356                    {
1357                        selector = &self.message_utf16[start_position.offset..self.offset()];
1358                    }
1359                    #[cfg(not(feature = "utf16"))]
1360                    {
1361                        selector = &self.message[start_position.offset..self.offset()];
1362                    }
1363                } else {
1364                    // TODO: check to make sure that the plural category is valid.
1365                    break;
1366                }
1367            }
1368
1369            // Duplicate selector clauses
1370            if selectors_parsed.contains(selector) {
1371                return Err(self.error(
1372                    if parent_arg_type == "select" {
1373                        ErrorKind::DuplicateSelectArgumentSelector
1374                    } else {
1375                        ErrorKind::DuplicatePluralArgumentSelector
1376                    },
1377                    selector_span,
1378                ));
1379            }
1380
1381            if selector == "other" {
1382                has_other_clause = true;
1383            }
1384
1385            // Parse:
1386            // one {one apple}
1387            //     ^----------^
1388            self.bump_space();
1389            let opening_brace_position = self.position();
1390            if !self.bump_if("{") {
1391                return Err(self.error(
1392                    if parent_arg_type == "select" {
1393                        ErrorKind::ExpectSelectArgumentSelectorFragment
1394                    } else {
1395                        ErrorKind::ExpectPluralArgumentSelectorFragment
1396                    },
1397                    Span::new(self.position(), self.position()),
1398                ));
1399            }
1400
1401            let fragment = self.parse_message(
1402                nesting_level + 1,
1403                parent_arg_type.to_string().as_str(),
1404                expecting_close_tag,
1405            )?;
1406            self.try_parse_argument_close(opening_brace_position)?;
1407
1408            options.push((
1409                selector,
1410                PluralOrSelectOption {
1411                    value: fragment,
1412                    location: if self.options.capture_location {
1413                        Some(Span::new(opening_brace_position, self.position()))
1414                    } else {
1415                        None
1416                    },
1417                },
1418            ));
1419            // Keep track of the existing selectors
1420            selectors_parsed.insert(selector);
1421
1422            // Prep next selector clause.
1423            self.bump_space();
1424            // 🤷‍♂️ Destructure assignment is NOT yet supported by Rust.
1425            let _identifier_and_span = self.parse_identifier_if_possible();
1426            selector = _identifier_and_span.0;
1427            selector_span = _identifier_and_span.1;
1428        }
1429
1430        if options.is_empty() {
1431            return Err(self.error(
1432                match parent_arg_type.to_string().as_str() {
1433                    "select" => ErrorKind::ExpectSelectArgumentSelector,
1434                    _ => ErrorKind::ExpectPluralArgumentSelector,
1435                },
1436                Span::new(self.position(), self.position()),
1437            ));
1438        }
1439
1440        if self.options.requires_other_clause && !has_other_clause {
1441            return Err(self.error(
1442                ErrorKind::MissingOtherClause,
1443                Span::new(self.position(), self.position()),
1444            ));
1445        }
1446
1447        Ok(PluralOrSelectOptions(options))
1448    }
1449
1450    fn try_parse_decimal_integer(
1451        &self,
1452        expect_number_error: ErrorKind,
1453        invalid_number_error: ErrorKind,
1454    ) -> Result<i64> {
1455        let mut sign = 1;
1456        let start_position = self.position();
1457
1458        if self.bump_if("+") {
1459        } else if self.bump_if("-") {
1460            sign = -1;
1461        }
1462
1463        let mut digits = String::new();
1464        while !self.is_eof() && self.char().is_digit(10) {
1465            digits.push(self.char());
1466            self.bump();
1467        }
1468
1469        let span = Span::new(start_position, self.position());
1470
1471        if self.is_eof() {
1472            return Err(self.error(expect_number_error, span));
1473        }
1474
1475        digits
1476            .parse::<i64>()
1477            .map(|x| x * sign)
1478            .map_err(|_| self.error(invalid_number_error, span))
1479    }
1480
1481    /// See: https://github.com/unicode-org/icu/blob/af7ed1f6d2298013dc303628438ec4abe1f16479/icu4c/source/common/messagepattern.cpp#L659
1482    fn parse_simple_arg_style_if_possible(&self) -> Result<&str> {
1483        let mut nested_braces = 0;
1484
1485        let start_position = self.position();
1486        while !self.is_eof() {
1487            match self.char() {
1488                '\'' => {
1489                    // Treat apostrophe as quoting but include it in the style part.
1490                    // Find the end of the quoted literal text.
1491                    self.bump();
1492                    let apostrophe_position = self.position();
1493                    if !self.bump_until('\'') {
1494                        return Err(self.error(
1495                            ErrorKind::UnclosedQuoteInArgumentStyle,
1496                            Span::new(apostrophe_position, self.position()),
1497                        ));
1498                    }
1499                    self.bump();
1500                }
1501                '{' => {
1502                    nested_braces += 1;
1503                    self.bump();
1504                }
1505                '}' => {
1506                    if nested_braces > 0 {
1507                        nested_braces -= 1;
1508                    } else {
1509                        break;
1510                    }
1511                }
1512                _ => {
1513                    self.bump();
1514                }
1515            }
1516        }
1517
1518        Ok(&self.message[start_position.offset..self.offset()])
1519    }
1520
1521    fn try_parse_argument_close(&self, opening_brace_position: Position) -> Result<()> {
1522        // Parse: {value, number, ::currency/GBP }
1523        //                                       ^^
1524        if self.is_eof() {
1525            return Err(self.error(
1526                ErrorKind::ExpectArgumentClosingBrace,
1527                Span::new(opening_brace_position, self.position()),
1528            ));
1529        }
1530
1531        if self.char() != '}' {
1532            return Err(self.error(
1533                ErrorKind::ExpectArgumentClosingBrace,
1534                Span::new(opening_brace_position, self.position()),
1535            ));
1536        }
1537        self.bump(); // `}`
1538
1539        Ok(())
1540    }
1541
1542    fn parse_identifier_if_possible_inner(&self) -> Span {
1543        let starting_position = self.position();
1544
1545        while !self.is_eof() && !is_whitespace(self.char()) && !is_pattern_syntax(self.char()) {
1546            self.bump();
1547        }
1548
1549        let end_position = self.position();
1550        Span::new(starting_position, end_position)
1551    }
1552
1553    /// Advance the parser until the end of the identifier, if it is currently on
1554    /// an identifier character. Return an empty string otherwise.
1555    #[cfg(feature = "utf16")]
1556    fn parse_identifier_if_possible(&self) -> (&Utf16Str, Span) {
1557        let span = self.parse_identifier_if_possible_inner();
1558        (
1559            &self.message_utf16[span.start.offset..span.end.offset],
1560            span,
1561        )
1562    }
1563
1564    #[cfg(not(feature = "utf16"))]
1565    fn parse_identifier_if_possible(&self) -> (&str, Span) {
1566        let span = self.parse_identifier_if_possible_inner();
1567        (&self.message[span.start.offset..span.end.offset], span)
1568    }
1569
1570    fn error(&self, kind: ErrorKind, span: Span) -> ast::Error {
1571        ast::Error {
1572            kind,
1573            message: self.message.to_string(),
1574            location: if self.options.capture_location {
1575                Some(span)
1576            } else {
1577                None
1578            },
1579        }
1580    }
1581
1582    fn offset(&self) -> usize {
1583        self.position().offset
1584    }
1585
1586    /// Return the character at the current position of the parser.
1587    ///
1588    /// This panics if the current position does not point to a valid char.
1589    fn char(&self) -> char {
1590        self.char_at(self.offset())
1591    }
1592
1593    /// Return the character at the given position.
1594    ///
1595    /// This panics if the given position does not point to a valid char.
1596    fn char_at(&self, i: usize) -> char {
1597        #[cfg(feature = "utf16")]
1598        let message = &self.message_utf16[i..].to_string();
1599        #[cfg(feature = "utf16")]
1600        let message = message.as_str();
1601
1602        #[cfg(not(feature = "utf16"))]
1603        let message = &self.message[i..];
1604
1605        message
1606            .chars()
1607            .next()
1608            .unwrap_or_else(|| panic!("expected char at offset {}", i))
1609    }
1610
1611    /// Bump the parser to the next Unicode scalar value.
1612    fn bump(&self) {
1613        if self.is_eof() {
1614            return;
1615        }
1616        let Position {
1617            mut offset,
1618            mut line,
1619            mut column,
1620        } = self.position();
1621        let ch = self.char();
1622        if ch == '\n' {
1623            line = line.checked_add(1).unwrap();
1624            column = 1;
1625        } else {
1626            column = column.checked_add(1).unwrap();
1627        }
1628
1629        #[cfg(feature = "utf16")]
1630        {
1631            offset += ch.len_utf16();
1632        }
1633        #[cfg(not(feature = "utf16"))]
1634        {
1635            offset += ch.len_utf8();
1636        }
1637        self.position.set(Position {
1638            offset,
1639            line,
1640            column,
1641        });
1642    }
1643
1644    /// Bump the parser to the target offset.
1645    ///
1646    /// If target offset is beyond the end of the input, bump the parser to the end of the input.
1647    fn bump_to(&self, target_offset: usize) {
1648        assert!(
1649            self.offset() <= target_offset,
1650            "target_offset {} must be greater than the current offset {})",
1651            target_offset,
1652            self.offset()
1653        );
1654
1655        let target_offset = cmp::min(target_offset, self.message.len());
1656        loop {
1657            let offset = self.offset();
1658
1659            if self.offset() == target_offset {
1660                break;
1661            }
1662            assert!(
1663                offset < target_offset,
1664                "target_offset is at invalid unicode byte boundary: {}",
1665                target_offset
1666            );
1667
1668            self.bump();
1669            if self.is_eof() {
1670                break;
1671            }
1672        }
1673    }
1674
1675    /// If the substring starting at the current position of the parser has
1676    /// the given prefix, then bump the parser to the character immediately
1677    /// following the prefix and return true. Otherwise, don't bump the parser
1678    /// and return false.
1679    fn bump_if(&self, prefix: &str) -> bool {
1680        #[cfg(feature = "utf16")]
1681        let message = &self.message_utf16[self.offset()..].to_string();
1682        #[cfg(feature = "utf16")]
1683        let message = message.as_str();
1684
1685        #[cfg(not(feature = "utf16"))]
1686        let message = &self.message[self.offset()..];
1687
1688        if message.starts_with(prefix) {
1689            for _ in 0..prefix.chars().count() {
1690                self.bump();
1691            }
1692            true
1693        } else {
1694            false
1695        }
1696    }
1697
1698    /// Bump the parser until the pattern character is found and return `true`.
1699    /// Otherwise bump to the end of the file and return `false`.
1700    fn bump_until(&self, pattern: char) -> bool {
1701        let current_offset = self.offset();
1702        if let Some(delta) = self.message[current_offset..].find(pattern) {
1703            self.bump_to(current_offset + delta);
1704            true
1705        } else {
1706            self.bump_to(self.message.len());
1707            false
1708        }
1709    }
1710
1711    /// advance the parser through all whitespace to the next non-whitespace byte.
1712    fn bump_space(&self) {
1713        while !self.is_eof() && is_whitespace(self.char()) {
1714            self.bump();
1715        }
1716    }
1717
1718    /// Peek at the *next* character in the input without advancing the parser.
1719    ///
1720    /// If the input has been exhausted, then this returns `None`.
1721    fn peek(&self) -> Option<char> {
1722        if self.is_eof() {
1723            return None;
1724        }
1725        self.message[self.offset() + self.char().len_utf8()..]
1726            .chars()
1727            .next()
1728    }
1729
1730    /// Returns true if the next call to `bump` would return false.
1731    fn is_eof(&self) -> bool {
1732        #[cfg(feature = "utf16")]
1733        return self.offset() == self.message_utf16.len();
1734
1735        #[cfg(not(feature = "utf16"))]
1736        return self.offset() == self.message.len();
1737    }
1738}
1739
1740fn parse_number_skeleton_from_string(
1741    skeleton: &str,
1742    span: Span,
1743    should_parse_skeleton: bool,
1744    should_capture_location: bool,
1745) -> std::result::Result<NumberSkeleton, ErrorKind> {
1746    if skeleton.is_empty() {
1747        return Err(ErrorKind::InvalidNumberSkeleton);
1748    }
1749    // Parse the skeleton
1750    let tokens: std::result::Result<Vec<_>, _> = skeleton
1751        .split(char::is_whitespace)
1752        .filter(|x| !x.is_empty())
1753        .map(|token| {
1754            let mut stem_and_options = token.split('/');
1755            if let Some(stem) = stem_and_options.next() {
1756                let options: std::result::Result<Vec<_>, _> = stem_and_options
1757                    .map(|option| {
1758                        // Token option cannot be empty
1759                        if option.is_empty() {
1760                            Err(ErrorKind::InvalidNumberSkeleton)
1761                        } else {
1762                            Ok(option)
1763                        }
1764                    })
1765                    .collect();
1766                Ok(NumberSkeletonToken {
1767                    stem,
1768                    options: options?,
1769                })
1770            } else {
1771                Err(ErrorKind::InvalidNumberSkeleton)
1772            }
1773        })
1774        .collect();
1775
1776    let tokens = tokens?;
1777    let parsed_options = if should_parse_skeleton {
1778        parse_number_skeleton(&tokens)
1779    } else {
1780        Default::default()
1781    };
1782
1783    Ok(NumberSkeleton {
1784        skeleton_type: SkeletonType::Number,
1785        tokens,
1786        // TODO: use trimmed end position
1787        location: if should_capture_location {
1788            Some(span)
1789        } else {
1790            None
1791        },
1792        parsed_options,
1793    })
1794}
1795
1796fn is_potential_element_name_char(ch: char) -> bool {
1797    matches!(ch, '-'
1798        | '.'
1799        | '0'..='9'
1800        | '_'
1801        | 'a'..='z'
1802        | 'A'..='Z'
1803        | '\u{B7}'
1804        | '\u{C0}'..='\u{D6}'
1805        | '\u{D8}'..='\u{F6}'
1806        | '\u{F8}'..='\u{37D}'
1807        | '\u{37F}'..='\u{1FFF}'
1808        | '\u{200C}'..='\u{200D}'
1809        | '\u{203F}'..='\u{2040}'
1810        | '\u{2070}'..='\u{218F}'
1811        | '\u{2C00}'..='\u{2FEF}'
1812        | '\u{3001}'..='\u{D7FF}'
1813        | '\u{F900}'..='\u{FDCF}'
1814        | '\u{FDF0}'..='\u{FFFD}'
1815        | '\u{10000}'..='\u{EFFFF}')
1816}