use crate::date_time_pattern_generator::get_best_pattern;
use crate::error::{ErrorKind, Location, LocationDetails, ParserError};
use crate::regex_generated::SPACE_SEPARATOR_REGEX;
use crate::types::*;
use formatjs_icu_skeleton_parser::{parse_date_time_skeleton, parse_number_skeleton};
use icu::locale::Locale;
use once_cell::sync::Lazy;
use regex::Regex;
use indexmap::IndexMap;
use std::collections::HashSet;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Position {
pub offset: usize,
pub byte_offset: usize,
pub line: usize,
pub column: usize,
}
impl Position {
pub fn new() -> Self {
Position {
offset: 0,
byte_offset: 0,
line: 1,
column: 1,
}
}
}
impl Default for Position {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Default)]
pub struct ParserOptions {
pub ignore_tag: bool,
pub requires_other_clause: bool,
pub should_parse_skeletons: bool,
pub capture_location: bool,
pub locale: Option<Locale>,
}
pub type Result<T> = std::result::Result<T, ParserError>;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ArgType {
Number,
Date,
Time,
Select,
Plural,
SelectOrdinal,
None,
}
impl ArgType {
fn from_str(s: &str) -> Option<Self> {
match s {
"number" => Some(ArgType::Number),
"date" => Some(ArgType::Date),
"time" => Some(ArgType::Time),
"select" => Some(ArgType::Select),
"plural" => Some(ArgType::Plural),
"selectordinal" => Some(ArgType::SelectOrdinal),
"" => Some(ArgType::None),
_ => None,
}
}
}
static SPACE_SEPARATOR_START_REGEX: Lazy<Regex> = Lazy::new(|| {
let pattern = format!(
"^{}*",
SPACE_SEPARATOR_REGEX
.as_str()
.trim_start_matches('^')
.trim_end_matches('$')
);
Regex::new(&pattern).expect("Failed to compile SPACE_SEPARATOR_START_REGEX")
});
static SPACE_SEPARATOR_END_REGEX: Lazy<Regex> = Lazy::new(|| {
let pattern = format!(
"{}*$",
SPACE_SEPARATOR_REGEX
.as_str()
.trim_start_matches('^')
.trim_end_matches('$')
);
Regex::new(&pattern).expect("Failed to compile SPACE_SEPARATOR_END_REGEX")
});
const LEFT_ANGLE_BRACKET: &str = "<";
const APOSTROPHE: &str = "'";
fn match_identifier_at_index(s: &str, byte_index: usize) -> (&str, usize) {
if byte_index >= s.len() {
return ("", 0);
}
let substring = &s[byte_index..];
let mut char_count = 0usize;
let end_byte = substring
.char_indices()
.take_while(|&(_idx, c)| {
let is_id_char = is_identifier_char(c);
if is_id_char {
char_count += 1;
}
is_id_char
})
.last()
.map(|(idx, ch)| idx + ch.len_utf8()) .unwrap_or(0);
(&substring[..end_byte], char_count) }
#[inline]
fn is_alpha(codepoint: u32) -> bool {
(codepoint >= 97 && codepoint <= 122) || (codepoint >= 65 && codepoint <= 90)
}
#[inline]
fn is_alpha_or_slash(codepoint: u32) -> bool {
is_alpha(codepoint) || codepoint == 47 }
#[inline]
fn is_potential_element_name_char(c: u32) -> bool {
c == 45 || c == 46 || (c >= 48 && c <= 57) || c == 95 || (c >= 97 && c <= 122) || (c >= 65 && c <= 90) || c == 0xb7
|| (c >= 0xc0 && c <= 0xd6)
|| (c >= 0xd8 && c <= 0xf6)
|| (c >= 0xf8 && c <= 0x37d)
|| (c >= 0x37f && c <= 0x1fff)
|| (c >= 0x200c && c <= 0x200d)
|| (c >= 0x203f && c <= 0x2040)
|| (c >= 0x2070 && c <= 0x218f)
|| (c >= 0x2c00 && c <= 0x2fef)
|| (c >= 0x3001 && c <= 0xd7ff)
|| (c >= 0xf900 && c <= 0xfdcf)
|| (c >= 0xfdf0 && c <= 0xfffd)
|| (c >= 0x10000 && c <= 0xeffff)
}
#[inline]
fn is_white_space(c: u32) -> bool {
(c >= 0x0009 && c <= 0x000d)
|| c == 0x0020
|| c == 0x0085
|| (c >= 0x200e && c <= 0x200f)
|| c == 0x2028
|| c == 0x2029
}
#[inline]
fn is_pattern_syntax(c: char) -> bool {
match c {
'{' | '}' | '#' | '<' | '>' | '\'' | '|' => true,
'[' | ']' | '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/' => true,
':' | ';' | '=' | '?' | '@' | '\\' | '^' | '`' | '~' => true,
'!' | '"' | '$' | '%' | '&' => true,
_ if c <= '\u{007F}' => false, _ => {
matches!(c as u32,
0x00A1..=0x00A7 | 0x00A9 | 0x00AB..=0x00AC | 0x00AE |
0x00B0..=0x00B1 | 0x00B6 | 0x00BB | 0x00BF | 0x00D7 | 0x00F7 |
0x2010..=0x2027 | 0x2030..=0x203E | 0x2041..=0x2053 |
0x2055..=0x205E | 0x2190..=0x245F | 0x2500..=0x2775 |
0x2794..=0x2BFF | 0x2E00..=0x2E7F | 0x3001..=0x3003 |
0x3008..=0x3020 | 0x3030 | 0xFD3E..=0xFD3F | 0xFE45..=0xFE46
)
}
}
}
#[inline]
fn is_identifier_char(c: char) -> bool {
!is_white_space(c as u32) && !is_pattern_syntax(c)
}
fn trim_start(s: &str) -> String {
SPACE_SEPARATOR_START_REGEX.replace(s, "").to_string()
}
fn trim_end(s: &str) -> String {
SPACE_SEPARATOR_END_REGEX.replace(s, "").to_string()
}
fn create_location(start: Position, end: Position) -> Option<Location> {
Some(Location {
start: LocationDetails {
offset: start.offset,
line: start.line,
column: start.column,
},
end: LocationDetails {
offset: end.offset,
line: end.line,
column: end.column,
},
})
}
pub struct Parser {
message: String,
position: Position,
locale: Option<Locale>,
ignore_tag: bool,
requires_other_clause: bool,
should_parse_skeletons: bool,
capture_location: bool,
}
impl Parser {
pub fn new(message: impl Into<String>, options: ParserOptions) -> Self {
Parser {
message: message.into(),
position: Position::new(),
locale: options.locale,
ignore_tag: options.ignore_tag,
requires_other_clause: options.requires_other_clause,
should_parse_skeletons: options.should_parse_skeletons,
capture_location: options.capture_location,
}
}
pub fn parse(mut self) -> Result<Vec<MessageFormatElement>> {
if self.offset() != 0 {
panic!("parser can only be used once");
}
self.parse_message(0, ArgType::None, false)
}
#[inline]
fn byte_offset(&self) -> usize {
self.position.byte_offset
}
#[inline]
fn offset(&self) -> usize {
self.position.offset
}
#[inline]
fn is_eof(&self) -> bool {
self.byte_offset() >= self.message.len()
}
fn char(&self) -> u32 {
let byte_offset = self.position.byte_offset;
if byte_offset >= self.message.len() {
panic!("out of bound");
}
let remaining = &self.message[byte_offset..];
let ch = remaining
.chars()
.next()
.expect("Offset is at invalid UTF-8 boundary");
ch as u32
}
#[inline]
fn clone_position(&self) -> Position {
self.position
}
fn error<T>(&self, kind: ErrorKind, location: Option<Location>) -> Result<T> {
let loc = location.unwrap_or_else(|| {
let pos = self.clone_position();
Location {
start: LocationDetails {
offset: pos.offset,
line: pos.line,
column: pos.column,
},
end: LocationDetails {
offset: pos.offset,
line: pos.line,
column: pos.column,
},
}
});
Err(ParserError {
kind,
message: self.message.clone(),
location: loc,
})
}
fn bump(&mut self) {
if self.is_eof() {
return;
}
let code = self.char();
let ch = std::char::from_u32(code).unwrap();
let char_byte_len = ch.len_utf8();
if code == 10 {
self.position.line += 1;
self.position.column = 1;
self.position.offset += 1;
self.position.byte_offset += char_byte_len;
} else {
self.position.column += 1;
self.position.offset += 1; self.position.byte_offset += char_byte_len; }
}
fn bump_if(&mut self, prefix: &str) -> bool {
if self.message[self.byte_offset()..].starts_with(prefix) {
for _ in 0..prefix.chars().count() {
self.bump();
}
true
} else {
false
}
}
fn bump_until(&mut self, pattern: &str) -> bool {
let current_offset = self.offset();
if let Some(index) = self.message[current_offset..].find(pattern) {
self.bump_to(current_offset + index);
true
} else {
self.bump_to(self.message.len());
false
}
}
fn bump_to(&mut self, target_offset: usize) {
if self.offset() > target_offset {
panic!(
"targetOffset {} must be greater than or equal to the current offset {}",
target_offset,
self.offset()
);
}
let target_offset = target_offset.min(self.message.len());
while self.offset() < target_offset {
self.bump();
if self.is_eof() {
break;
}
}
if self.offset() != target_offset && target_offset < self.message.len() {
panic!(
"targetOffset {} is at invalid UTF-8 boundary",
target_offset
);
}
}
fn bump_space(&mut self) {
while !self.is_eof() && is_white_space(self.char()) {
self.bump();
}
}
fn peek(&self) -> Option<u32> {
if self.is_eof() {
return None;
}
let byte_offset = self.byte_offset();
let ch = self.char();
let char_len = std::char::from_u32(ch).unwrap().len_utf8();
let next_byte_offset = byte_offset + char_len;
if next_byte_offset >= self.message.len() {
None
} else {
let remaining = &self.message[next_byte_offset..];
remaining.chars().next().map(|c| c as u32)
}
}
fn parse_identifier_if_possible(&mut self) -> (String, Option<Location>) {
let starting_position = self.clone_position();
let start_byte_offset = self.byte_offset();
let (value, char_count) = match_identifier_at_index(&self.message, start_byte_offset);
let value_string = value.to_string();
let target_offset = self.offset() + char_count;
self.bump_to(target_offset);
let end_position = self.clone_position();
let location = if self.capture_location {
create_location(starting_position, end_position)
} else {
None
};
(value_string, location)
}
fn try_parse_left_angle_bracket(&mut self) -> Option<String> {
if !self.is_eof()
&& self.char() == 60 && (self.ignore_tag || !is_alpha_or_slash(self.peek().unwrap_or(0)))
{
self.bump();
Some(LEFT_ANGLE_BRACKET.to_string())
} else {
None
}
}
fn try_parse_quote(&mut self, parent_arg_type: ArgType) -> Option<String> {
if self.is_eof() || self.char() != 39 {
return None;
}
let next_char = self.peek()?;
match next_char {
39 => {
self.bump(); self.bump(); return Some(APOSTROPHE.to_string());
}
123 | 60 | 62 | 125 => { }
35 => {
if parent_arg_type == ArgType::Plural || parent_arg_type == ArgType::SelectOrdinal {
} else {
return None;
}
}
_ => return None,
}
self.bump();
let mut code_points = vec![self.char()]; self.bump();
while !self.is_eof() {
let ch = self.char();
if ch == 39 {
if self.peek() == Some(39) {
code_points.push(39);
self.bump(); } else {
self.bump();
break;
}
} else {
code_points.push(ch);
}
self.bump();
}
Some(
code_points
.into_iter()
.map(|cp| std::char::from_u32(cp).unwrap())
.collect(),
)
}
fn try_parse_unquoted(
&mut self,
nesting_level: usize,
parent_arg_type: ArgType,
buffer: &mut String,
) -> bool {
if self.is_eof() {
return false;
}
let ch = self.char();
if ch == 60 || ch == 123 || (ch == 35 && (parent_arg_type == ArgType::Plural || parent_arg_type == ArgType::SelectOrdinal)) || (ch == 125 && nesting_level > 0)
{
false
} else {
self.bump();
buffer.push(std::char::from_u32(ch).unwrap());
true
}
}
fn parse_literal(
&mut self,
nesting_level: usize,
parent_arg_type: ArgType,
) -> Result<LiteralElement> {
let start = self.clone_position();
let mut value = String::new();
loop {
if let Some(quoted) = self.try_parse_quote(parent_arg_type) {
value.push_str("ed);
continue;
}
if self.try_parse_unquoted(nesting_level, parent_arg_type, &mut value) {
continue;
}
if let Some(bracket) = self.try_parse_left_angle_bracket() {
value.push_str(&bracket);
continue;
}
break;
}
let location = if self.capture_location {
create_location(start, self.clone_position())
} else {
None
};
Ok(if let Some(loc) = location {
LiteralElement::with_location(value, loc)
} else {
LiteralElement::new(value)
})
}
fn parse_tag_name(&mut self) -> String {
let start_byte_offset = self.byte_offset();
self.bump();
while !self.is_eof() && is_potential_element_name_char(self.char()) {
self.bump();
}
self.message[start_byte_offset..self.byte_offset()].to_string()
}
fn parse_tag(
&mut self,
nesting_level: usize,
parent_arg_type: ArgType,
) -> Result<MessageFormatElement> {
let start_position = self.clone_position();
self.bump();
let tag_name = self.parse_tag_name();
self.bump_space();
if self.bump_if("/>") {
let location = if self.capture_location {
create_location(start_position, self.clone_position())
} else {
None
};
let elem = if let Some(loc) = location {
LiteralElement::with_location(format!("<{}/>", tag_name), loc)
} else {
LiteralElement::new(format!("<{}/>", tag_name))
};
return Ok(MessageFormatElement::Literal(elem));
} else if self.bump_if(">") {
let children = self.parse_message(nesting_level + 1, parent_arg_type, true)?;
let end_tag_start_position = self.clone_position();
if self.bump_if("</") {
if self.is_eof() || !is_alpha(self.char()) {
let location = if self.capture_location {
create_location(end_tag_start_position, self.clone_position())
} else {
None
};
return self.error(ErrorKind::InvalidTag, location);
}
let closing_tag_name_start = self.clone_position();
let closing_tag_name = self.parse_tag_name();
if tag_name != closing_tag_name {
let location = if self.capture_location {
create_location(closing_tag_name_start, self.clone_position())
} else {
None
};
return self.error(ErrorKind::UnmatchedClosingTag, location);
}
self.bump_space();
if !self.bump_if(">") {
let location = if self.capture_location {
create_location(end_tag_start_position, self.clone_position())
} else {
None
};
return self.error(ErrorKind::InvalidTag, location);
}
let location = if self.capture_location {
create_location(start_position, self.clone_position())
} else {
None
};
return Ok(MessageFormatElement::Tag(TagElement {
value: tag_name,
children,
location,
}));
} else {
let location = if self.capture_location {
create_location(start_position, self.clone_position())
} else {
None
};
return self.error(ErrorKind::UnclosedTag, location);
}
} else {
let location = if self.capture_location {
create_location(start_position, self.clone_position())
} else {
None
};
return self.error(ErrorKind::InvalidTag, location);
}
}
fn try_parse_argument_close(&mut self, opening_brace_position: Position) -> Result<()> {
if self.is_eof() || self.char() != 125 {
let location = if self.capture_location {
create_location(opening_brace_position, self.clone_position())
} else {
None
};
return self.error(ErrorKind::ExpectArgumentClosingBrace, location);
}
self.bump(); Ok(())
}
fn try_parse_decimal_integer(
&mut self,
expect_number_error: ErrorKind,
invalid_number_error: ErrorKind,
) -> Result<i32> {
let mut sign = 1;
let starting_position = self.clone_position();
if self.bump_if("+") {
} else if self.bump_if("-") {
sign = -1;
}
let mut has_digits = false;
let mut decimal: i64 = 0;
while !self.is_eof() {
let ch = self.char();
if ch >= 48 && ch <= 57 {
has_digits = true;
decimal = decimal * 10 + (ch - 48) as i64;
self.bump();
} else {
break;
}
}
let location = if self.capture_location {
create_location(starting_position, self.clone_position())
} else {
None
};
if !has_digits {
return self.error(expect_number_error, location);
}
decimal *= sign as i64;
if decimal < i32::MIN as i64 || decimal > i32::MAX as i64 {
return self.error(invalid_number_error, location);
}
Ok(decimal as i32)
}
fn parse_simple_arg_style_if_possible(&mut self) -> Result<String> {
let mut nested_braces = 0;
let start_position = self.clone_position();
while !self.is_eof() {
let ch = self.char();
match ch {
39 => {
self.bump();
let apostrophe_position = self.clone_position();
if !self.bump_until("'") {
let location = if self.capture_location {
create_location(apostrophe_position, self.clone_position())
} else {
None
};
return self.error(ErrorKind::UnclosedQuoteInArgumentStyle, location);
}
self.bump(); }
123 => {
nested_braces += 1;
self.bump();
}
125 => {
if nested_braces > 0 {
nested_braces -= 1;
self.bump();
} else {
break;
}
}
_ => {
self.bump();
}
}
}
Ok(self.message[start_position.byte_offset..self.byte_offset()].to_string())
}
fn parse_message(
&mut self,
nesting_level: usize,
parent_arg_type: ArgType,
expecting_close_tag: bool,
) -> Result<Vec<MessageFormatElement>> {
let mut elements = Vec::new();
while !self.is_eof() {
let ch = self.char();
if ch == 123 {
let element = self.parse_argument(nesting_level, expecting_close_tag)?;
elements.push(element);
} else if ch == 125 && nesting_level > 0 {
break;
} else if ch == 35
&& (parent_arg_type == ArgType::Plural || parent_arg_type == ArgType::SelectOrdinal)
{
let position = self.clone_position();
self.bump();
let location = if self.capture_location {
create_location(position, self.clone_position())
} else {
None
};
elements.push(MessageFormatElement::Pound(PoundElement { location }));
} else if ch == 60 && !self.ignore_tag && self.peek() == Some(47) {
if expecting_close_tag {
break;
} else {
let location = if self.capture_location {
create_location(self.clone_position(), self.clone_position())
} else {
None
};
return self.error(ErrorKind::UnmatchedClosingTag, location);
}
} else if ch == 60 && !self.ignore_tag && self.peek().map_or(false, is_alpha) {
let element = self.parse_tag(nesting_level, parent_arg_type)?;
elements.push(element);
} else {
let element = self.parse_literal(nesting_level, parent_arg_type)?;
elements.push(MessageFormatElement::Literal(element));
}
}
Ok(elements)
}
fn parse_argument(
&mut self,
nesting_level: usize,
expecting_close_tag: bool,
) -> Result<MessageFormatElement> {
let opening_brace_position = self.clone_position();
self.bump(); self.bump_space();
if self.is_eof() {
let location = if self.capture_location {
create_location(opening_brace_position, self.clone_position())
} else {
None
};
return self.error(ErrorKind::ExpectArgumentClosingBrace, location);
}
if self.char() == 125 {
self.bump();
let location = if self.capture_location {
create_location(opening_brace_position, self.clone_position())
} else {
None
};
return self.error(ErrorKind::EmptyArgument, location);
}
let (value, _value_location) = self.parse_identifier_if_possible();
if value.is_empty() {
let location = if self.capture_location {
create_location(opening_brace_position, self.clone_position())
} else {
None
};
return self.error(ErrorKind::MalformedArgument, location);
}
self.bump_space();
if self.is_eof() {
let location = if self.capture_location {
create_location(opening_brace_position, self.clone_position())
} else {
None
};
return self.error(ErrorKind::ExpectArgumentClosingBrace, location);
}
match self.char() {
125 => {
self.bump();
let location = if self.capture_location {
create_location(opening_brace_position, self.clone_position())
} else {
None
};
let arg = if let Some(loc) = location {
ArgumentElement::with_location(value, loc)
} else {
ArgumentElement::new(value)
};
Ok(MessageFormatElement::Argument(arg))
}
44 => {
self.bump();
self.bump_space();
if self.is_eof() {
let location = if self.capture_location {
create_location(opening_brace_position, self.clone_position())
} else {
None
};
return self.error(ErrorKind::ExpectArgumentClosingBrace, location);
}
self.parse_argument_options(
nesting_level,
expecting_close_tag,
value,
opening_brace_position,
)
}
_ => {
let location = if self.capture_location {
create_location(opening_brace_position, self.clone_position())
} else {
None
};
self.error(ErrorKind::MalformedArgument, location)
}
}
}
fn parse_argument_options(
&mut self,
nesting_level: usize,
expecting_close_tag: bool,
value: String,
opening_brace_position: Position,
) -> Result<MessageFormatElement> {
let type_start_position = self.clone_position();
let (arg_type_str, _) = self.parse_identifier_if_possible();
let type_end_position = self.clone_position();
let arg_type = ArgType::from_str(&arg_type_str).ok_or_else(|| {
let location = if self.capture_location {
create_location(type_start_position, type_end_position)
} else {
None
};
let loc = location.unwrap_or_else(|| Location {
start: LocationDetails {
offset: type_start_position.offset,
line: type_start_position.line,
column: type_start_position.column,
},
end: LocationDetails {
offset: type_end_position.offset,
line: type_end_position.line,
column: type_end_position.column,
},
});
ParserError {
kind: if arg_type_str.is_empty() {
ErrorKind::ExpectArgumentType
} else {
ErrorKind::InvalidArgumentType
},
message: self.message.clone(),
location: loc,
}
})?;
match arg_type {
ArgType::Number | ArgType::Date | ArgType::Time => {
self.bump_space();
let style_and_location: Option<(String, Option<Location>)> = if self.bump_if(",") {
self.bump_space();
let style_start_position = self.clone_position();
let style = self.parse_simple_arg_style_if_possible()?;
let trimmed_style = trim_end(&style);
if trimmed_style.is_empty() {
let location = if self.capture_location {
create_location(self.clone_position(), self.clone_position())
} else {
None
};
return self.error(ErrorKind::ExpectArgumentStyle, location);
}
let style_location = if self.capture_location {
create_location(style_start_position, self.clone_position())
} else {
None
};
Some((trimmed_style, style_location))
} else {
None
};
self.try_parse_argument_close(opening_brace_position)?;
let location = if self.capture_location {
create_location(opening_brace_position, self.clone_position())
} else {
None
};
if let Some((style, style_location)) = style_and_location {
if style.starts_with("::") {
let skeleton = trim_start(&style[2..]);
if arg_type == ArgType::Number {
let tokens = NumberSkeletonToken::parse_from_string(&skeleton)
.map_err(|_| {
self.error::<()>(
ErrorKind::InvalidNumberSkeleton,
style_location.clone(),
)
.unwrap_err()
})?;
let parsed_options = if self.should_parse_skeletons {
parse_number_skeleton(&tokens).map_err(|_| {
self.error::<()>(
ErrorKind::InvalidNumberSkeleton,
style_location.clone(),
)
.unwrap_err()
})?
} else {
NumberFormatOptions::default()
};
let num_skeleton = NumberSkeleton {
tokens,
location: style_location,
parsed_options,
};
return Ok(MessageFormatElement::Number(NumberElement {
value,
style: Some(NumberSkeletonOrStyle::Skeleton(num_skeleton)),
location,
}));
} else {
if skeleton.is_empty() {
return self.error(ErrorKind::ExpectDateTimeSkeleton, location);
}
let date_time_pattern = if let Some(ref locale) = self.locale {
get_best_pattern(&skeleton, locale)
} else {
skeleton.clone()
};
let parsed_options = if self.should_parse_skeletons {
parse_date_time_skeleton(&date_time_pattern).unwrap_or_default()
} else {
DateTimeFormatOptions::default()
};
let dt_style = DateTimeSkeletonOrStyle::Skeleton(DateTimeSkeleton {
pattern: date_time_pattern,
location: style_location,
parsed_options,
});
if arg_type == ArgType::Date {
return Ok(MessageFormatElement::Date(DateElement {
value,
style: Some(dt_style),
location,
}));
} else {
return Ok(MessageFormatElement::Time(TimeElement {
value,
style: Some(dt_style),
location,
}));
}
}
}
let style = NumberSkeletonOrStyle::String(style);
match arg_type {
ArgType::Number => Ok(MessageFormatElement::Number(NumberElement {
value,
style: Some(style),
location,
})),
ArgType::Date => Ok(MessageFormatElement::Date(DateElement {
value,
style: Some(DateTimeSkeletonOrStyle::String(match style {
NumberSkeletonOrStyle::String(s) => s,
_ => unreachable!(),
})),
location,
})),
ArgType::Time => Ok(MessageFormatElement::Time(TimeElement {
value,
style: Some(DateTimeSkeletonOrStyle::String(match style {
NumberSkeletonOrStyle::String(s) => s,
_ => unreachable!(),
})),
location,
})),
_ => unreachable!(),
}
} else {
match arg_type {
ArgType::Number => Ok(MessageFormatElement::Number(NumberElement {
value,
style: None,
location,
})),
ArgType::Date => Ok(MessageFormatElement::Date(DateElement {
value,
style: None,
location,
})),
ArgType::Time => Ok(MessageFormatElement::Time(TimeElement {
value,
style: None,
location,
})),
_ => unreachable!(),
}
}
}
ArgType::Plural | ArgType::SelectOrdinal | ArgType::Select => {
let type_end_position = self.clone_position();
self.bump_space();
if !self.bump_if(",") {
let location = if self.capture_location {
create_location(type_end_position, type_end_position)
} else {
None
};
return self.error(ErrorKind::ExpectSelectArgumentOptions, location);
}
self.bump_space();
let mut plural_offset = 0;
let (mut identifier, mut identifier_location) = self.parse_identifier_if_possible();
if arg_type != ArgType::Select && identifier == "offset" {
if !self.bump_if(":") {
let location = if self.capture_location {
create_location(self.clone_position(), self.clone_position())
} else {
None
};
return self.error(ErrorKind::ExpectPluralArgumentOffsetValue, location);
}
self.bump_space();
plural_offset = self.try_parse_decimal_integer(
ErrorKind::ExpectPluralArgumentOffsetValue,
ErrorKind::InvalidPluralArgumentOffsetValue,
)?;
self.bump_space();
let (id, loc) = self.parse_identifier_if_possible();
identifier = id;
identifier_location = loc;
}
let options_vec = self.try_parse_plural_or_select_options(
nesting_level,
arg_type,
expecting_close_tag,
identifier,
identifier_location,
)?;
self.try_parse_argument_close(opening_brace_position)?;
let location = if self.capture_location {
create_location(opening_brace_position, self.clone_position())
} else {
None
};
if arg_type == ArgType::Select {
let options: IndexMap<String, PluralOrSelectOption> =
options_vec.into_iter().collect();
Ok(MessageFormatElement::Select(SelectElement {
value,
options,
location,
}))
} else {
let options: IndexMap<ValidPluralRule, PluralOrSelectOption> = options_vec
.into_iter()
.map(|(key, val)| (ValidPluralRule::from_str(&key), val))
.collect();
Ok(MessageFormatElement::Plural(PluralElement {
value,
options,
offset: plural_offset,
plural_type: if arg_type == ArgType::Plural {
PluralType::Cardinal
} else {
PluralType::Ordinal
},
location,
}))
}
}
ArgType::None => {
let location = if self.capture_location {
create_location(type_start_position, type_end_position)
} else {
None
};
self.error(ErrorKind::ExpectArgumentType, location)
}
}
}
fn try_parse_plural_or_select_options(
&mut self,
nesting_level: usize,
parent_arg_type: ArgType,
expect_close_tag: bool,
mut selector: String,
mut selector_location: Option<Location>,
) -> Result<Vec<(String, PluralOrSelectOption)>> {
let mut has_other_clause = false;
let mut options = Vec::new();
let mut parsed_selectors = HashSet::new();
loop {
if selector.is_empty() {
let start_position = self.clone_position();
if parent_arg_type != ArgType::Select && self.bump_if("=") {
let number = self.try_parse_decimal_integer(
ErrorKind::ExpectPluralArgumentSelector,
ErrorKind::InvalidPluralArgumentSelector,
)?;
selector_location = if self.capture_location {
create_location(start_position, self.clone_position())
} else {
None
};
selector = format!("={}", number);
} else {
break;
}
}
if parsed_selectors.contains(&selector) {
return self.error(
if parent_arg_type == ArgType::Select {
ErrorKind::DuplicateSelectArgumentSelector
} else {
ErrorKind::DuplicatePluralArgumentSelector
},
selector_location,
);
}
if selector == "other" {
has_other_clause = true;
}
self.bump_space();
let opening_brace_position = self.clone_position();
if !self.bump_if("{") {
let location = if self.capture_location {
create_location(self.clone_position(), self.clone_position())
} else {
None
};
return self.error(
if parent_arg_type == ArgType::Select {
ErrorKind::ExpectSelectArgumentSelectorFragment
} else {
ErrorKind::ExpectPluralArgumentSelectorFragment
},
location,
);
}
let fragment =
self.parse_message(nesting_level + 1, parent_arg_type, expect_close_tag)?;
self.try_parse_argument_close(opening_brace_position)?;
let option_location = if self.capture_location {
create_location(opening_brace_position, self.clone_position())
} else {
None
};
options.push((
selector.clone(),
PluralOrSelectOption {
value: fragment,
location: option_location,
},
));
parsed_selectors.insert(selector.clone());
self.bump_space();
let (next_selector, next_location) = self.parse_identifier_if_possible();
selector = next_selector;
selector_location = next_location;
}
if options.is_empty() {
let location = if self.capture_location {
create_location(self.clone_position(), self.clone_position())
} else {
None
};
return self.error(
if parent_arg_type == ArgType::Select {
ErrorKind::ExpectSelectArgumentSelector
} else {
ErrorKind::ExpectPluralArgumentSelector
},
location,
);
}
if self.requires_other_clause && !has_other_clause {
let location = if self.capture_location {
create_location(self.clone_position(), self.clone_position())
} else {
None
};
return self.error(ErrorKind::MissingOtherClause, location);
}
Ok(options)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_position_default() {
let pos = Position::default();
assert_eq!(pos.offset, 0);
assert_eq!(pos.line, 1);
assert_eq!(pos.column, 1);
}
#[test]
fn test_is_alpha() {
assert!(is_alpha(b'a' as u32));
assert!(is_alpha(b'Z' as u32));
assert!(!is_alpha(b'0' as u32));
assert!(!is_alpha(b'_' as u32));
}
#[test]
fn test_is_white_space() {
assert!(is_white_space(b' ' as u32));
assert!(is_white_space(b'\t' as u32));
assert!(is_white_space(b'\n' as u32));
assert!(!is_white_space(b'a' as u32));
}
#[test]
fn test_parser_creation() {
let parser = Parser::new("Hello world", ParserOptions::default());
assert_eq!(parser.message, "Hello world");
assert_eq!(parser.offset(), 0);
}
#[test]
fn test_parser_bump() {
let mut parser = Parser::new("abc", ParserOptions::default());
assert_eq!(parser.char(), b'a' as u32);
parser.bump();
assert_eq!(parser.char(), b'b' as u32);
assert_eq!(parser.offset(), 1);
parser.bump();
assert_eq!(parser.char(), b'c' as u32);
parser.bump();
assert!(parser.is_eof());
}
#[test]
fn test_parser_bump_if() {
let mut parser = Parser::new("hello world", ParserOptions::default());
assert!(parser.bump_if("hello"));
assert_eq!(parser.offset(), 5);
assert!(!parser.bump_if("goodbye"));
assert_eq!(parser.offset(), 5);
assert!(parser.bump_if(" world"));
assert!(parser.is_eof());
}
#[test]
fn test_parser_peek() {
let parser = Parser::new("ab", ParserOptions::default());
assert_eq!(parser.peek(), Some(b'b' as u32));
assert_eq!(parser.offset(), 0); }
#[test]
fn test_parser_hindi_text_with_tags() {
let parser = Parser::new("ही किंमत <span>जास्त</span>", ParserOptions::default());
let result = parser.parse();
assert!(result.is_ok(), "Failed to parse Hindi text with tags: {:?}", result.err());
let elements = result.unwrap();
assert_eq!(elements.len(), 2, "Expected 2 elements (literal + tag)");
match &elements[0] {
MessageFormatElement::Literal(lit) => {
assert_eq!(lit.value, "ही किंमत ", "First element should be Hindi text");
}
_ => panic!("First element should be a literal"),
}
match &elements[1] {
MessageFormatElement::Tag(tag) => {
assert_eq!(tag.value, "span", "Tag name should be 'span'");
assert_eq!(tag.children.len(), 1, "Tag should have 1 child");
match &tag.children[0] {
MessageFormatElement::Literal(lit) => {
assert_eq!(lit.value, "जास्त", "Tag content should be Hindi text");
}
_ => panic!("Tag child should be a literal"),
}
}
_ => panic!("Second element should be a tag"),
}
}
}