1use std::collections::{BTreeMap, BTreeSet};
4use std::error::Error;
5use std::fmt::{Display, Formatter};
6
7use crate::plurals::get_plural_categories;
8use crate::po::{PoFile, PoItem};
9
10pub type IcuAgoStyle = String;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub struct IcuParserOptions {
16 pub ignore_tag: bool,
18 pub requires_other_clause: bool,
20}
21
22impl Default for IcuParserOptions {
23 fn default() -> Self {
24 Self {
25 ignore_tag: false,
26 requires_other_clause: true,
27 }
28 }
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum IcuErrorKind {
34 SyntaxError,
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40pub enum IcuPluralType {
41 Cardinal,
43 Ordinal,
45}
46
47#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct IcuPluralOption {
50 pub value: Vec<IcuNode>,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq)]
56pub struct IcuSelectOption {
57 pub value: Vec<IcuNode>,
59}
60
61#[derive(Debug, Clone, PartialEq, Eq)]
63pub enum IcuNode {
64 Literal {
66 value: String,
68 },
69 Argument {
71 value: String,
73 },
74 Number {
76 value: String,
78 style: Option<String>,
80 },
81 Date {
83 value: String,
85 style: Option<String>,
87 },
88 Time {
90 value: String,
92 style: Option<String>,
94 },
95 List {
97 value: String,
99 style: Option<String>,
101 },
102 Duration {
104 value: String,
106 style: Option<String>,
108 },
109 Ago {
111 value: String,
113 style: Option<String>,
115 },
116 Name {
118 value: String,
120 style: Option<String>,
122 },
123 Select {
125 value: String,
127 options: BTreeMap<String, IcuSelectOption>,
129 },
130 Plural {
132 value: String,
134 options: BTreeMap<String, IcuPluralOption>,
136 offset: i32,
138 plural_type: IcuPluralType,
140 },
141 Pound,
143 Tag {
145 value: String,
147 children: Vec<IcuNode>,
149 },
150}
151
152#[derive(Debug, Clone, PartialEq, Eq)]
154pub struct IcuParseError {
155 pub kind: IcuErrorKind,
157 pub message: String,
159 pub offset: usize,
161}
162
163impl Display for IcuParseError {
164 fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
165 write!(
166 formatter,
167 "ICU syntax error at position {}: {}",
168 self.offset, self.message
169 )
170 }
171}
172
173impl Error for IcuParseError {}
174
175#[derive(Debug, Clone, PartialEq, Eq)]
177pub struct IcuVariable {
178 pub name: String,
180 pub kind: String,
182 pub style: Option<String>,
184}
185
186#[derive(Debug, Clone, PartialEq, Eq)]
188pub struct IcuValidationResult {
189 pub valid: bool,
191 pub errors: Vec<IcuParseError>,
193}
194
195#[derive(Debug, Clone, PartialEq, Eq)]
197pub struct IcuVariableComparison {
198 pub missing: Vec<String>,
200 pub extra: Vec<String>,
202 pub is_match: bool,
204}
205
206#[derive(Debug, Clone, PartialEq, Eq)]
208pub struct GettextToIcuOptions {
209 pub locale: String,
211 pub plural_variable: String,
213 pub expand_octothorpe: bool,
215}
216
217impl GettextToIcuOptions {
218 #[must_use]
220 pub fn new(locale: impl Into<String>) -> Self {
221 Self {
222 locale: locale.into(),
223 plural_variable: String::from("count"),
224 expand_octothorpe: true,
225 }
226 }
227}
228
229#[derive(Debug, Clone, Copy, PartialEq, Eq)]
230enum ParentArgType {
231 Plural,
232 SelectOrdinal,
233 None,
234}
235
236pub struct IcuParser<'a> {
238 pos: usize,
239 msg: &'a str,
240 ignore_tag: bool,
241 requires_other: bool,
242}
243
244impl<'a> IcuParser<'a> {
245 #[must_use]
247 pub fn new(message: &'a str, options: IcuParserOptions) -> Self {
248 Self {
249 pos: 0,
250 msg: message,
251 ignore_tag: options.ignore_tag,
252 requires_other: options.requires_other_clause,
253 }
254 }
255
256 pub fn parse(mut self) -> Result<Vec<IcuNode>, IcuParseError> {
258 let result = self.parse_message(0, ParentArgType::None)?;
259 if self.pos < self.msg.len() {
260 return Err(self.error("Unexpected character", None));
261 }
262 Ok(result)
263 }
264
265 fn parse_message(
266 &mut self,
267 depth: usize,
268 parent_arg: ParentArgType,
269 ) -> Result<Vec<IcuNode>, IcuParseError> {
270 let mut nodes = Vec::new();
271 let in_plural = matches!(
272 parent_arg,
273 ParentArgType::Plural | ParentArgType::SelectOrdinal
274 );
275
276 while self.pos < self.msg.len() {
277 let ch = self.current_char();
278 match ch {
279 Some('{') => nodes.push(self.parse_argument(depth)?),
280 Some('}') if depth > 0 => break,
281 Some('#') if in_plural => {
282 self.pos += 1;
283 nodes.push(IcuNode::Pound);
284 }
285 Some('<') if !self.ignore_tag => {
286 let next = self.peek_char(1);
287 if matches!(next, Some(c) if is_alpha(c) || c.is_ascii_digit()) {
288 nodes.push(self.parse_tag(depth, parent_arg)?);
289 } else if next == Some('/') {
290 break;
291 } else {
292 nodes.push(self.parse_literal(depth, in_plural));
293 }
294 }
295 _ => nodes.push(self.parse_literal(depth, in_plural)),
296 }
297 }
298
299 Ok(nodes)
300 }
301
302 fn parse_argument(&mut self, depth: usize) -> Result<IcuNode, IcuParseError> {
303 let start = self.pos;
304 self.pos += 1;
305 self.skip_whitespace();
306
307 if self.current_char() == Some('}') {
308 return Err(self.error("Empty argument", Some(start)));
309 }
310
311 let name = self.parse_identifier();
312 if name.is_empty() {
313 return Err(self.error("Expected argument name", Some(start)));
314 }
315
316 self.skip_whitespace();
317 if self.current_char() == Some('}') {
318 self.pos += 1;
319 return Ok(IcuNode::Argument { value: name });
320 }
321
322 if self.current_char() != Some(',') {
323 return Err(self.error("Expected ',' or '}'", Some(start)));
324 }
325 self.pos += 1;
326 self.skip_whitespace();
327
328 let arg_type = self.parse_identifier();
329 if arg_type.is_empty() {
330 return Err(self.error("Expected argument type", Some(start)));
331 }
332
333 let normalized_type = arg_type.to_lowercase();
334 match normalized_type.as_str() {
335 "number" | "date" | "time" | "list" | "duration" | "ago" | "name" => {
336 self.parse_formatted_arg(&normalized_type, name, start)
337 }
338 "plural" | "selectordinal" => self.parse_plural(&normalized_type, name, depth, start),
339 "select" => self.parse_select(name, depth, start),
340 _ => Err(self.error(&format!("Invalid argument type: {arg_type}"), Some(start))),
341 }
342 }
343
344 fn parse_formatted_arg(
345 &mut self,
346 arg_type: &str,
347 name: String,
348 start: usize,
349 ) -> Result<IcuNode, IcuParseError> {
350 self.skip_whitespace();
351 let mut style = None;
352
353 if self.current_char() == Some(',') {
354 self.pos += 1;
355 self.skip_whitespace();
356 let parsed_style = self.parse_style();
357 if parsed_style.is_empty() {
358 return Err(self.error("Expected style", Some(start)));
359 }
360 style = Some(parsed_style);
361 }
362
363 self.expect_char('}', Some(start))?;
364
365 Ok(match arg_type {
366 "number" => IcuNode::Number { value: name, style },
367 "date" => IcuNode::Date { value: name, style },
368 "time" => IcuNode::Time { value: name, style },
369 "list" => IcuNode::List { value: name, style },
370 "duration" => IcuNode::Duration { value: name, style },
371 "ago" => IcuNode::Ago { value: name, style },
372 "name" => IcuNode::Name { value: name, style },
373 _ => unreachable!(),
374 })
375 }
376
377 fn parse_plural(
378 &mut self,
379 arg_type: &str,
380 name: String,
381 depth: usize,
382 start: usize,
383 ) -> Result<IcuNode, IcuParseError> {
384 self.skip_whitespace();
385 self.expect_char(',', Some(start))?;
386 self.skip_whitespace();
387
388 let mut offset = 0;
389 if self.peek_identifier() == "offset" {
390 let _ = self.parse_identifier();
391 self.expect_char(':', Some(start))?;
392 self.skip_whitespace();
393 offset = self.parse_integer(Some(start))?;
394 self.skip_whitespace();
395 }
396
397 let parent = if arg_type == "plural" {
398 ParentArgType::Plural
399 } else {
400 ParentArgType::SelectOrdinal
401 };
402 let options = self.parse_plural_options(depth, parent)?;
403 self.expect_char('}', Some(start))?;
404
405 Ok(IcuNode::Plural {
406 value: name,
407 options,
408 offset,
409 plural_type: if arg_type == "plural" {
410 IcuPluralType::Cardinal
411 } else {
412 IcuPluralType::Ordinal
413 },
414 })
415 }
416
417 fn parse_select(
418 &mut self,
419 name: String,
420 depth: usize,
421 start: usize,
422 ) -> Result<IcuNode, IcuParseError> {
423 self.skip_whitespace();
424 self.expect_char(',', Some(start))?;
425 self.skip_whitespace();
426
427 let options = self.parse_select_options(depth)?;
428 self.expect_char('}', Some(start))?;
429
430 Ok(IcuNode::Select {
431 value: name,
432 options,
433 })
434 }
435
436 fn parse_plural_options(
437 &mut self,
438 depth: usize,
439 parent_arg: ParentArgType,
440 ) -> Result<BTreeMap<String, IcuPluralOption>, IcuParseError> {
441 let mut options = BTreeMap::new();
442 let mut seen = BTreeSet::new();
443
444 while self.pos < self.msg.len() && self.current_char() != Some('}') {
445 self.skip_whitespace();
446
447 let selector = if self.current_char() == Some('=') {
448 self.pos += 1;
449 format!("={}", self.parse_integer(None)?)
450 } else {
451 let selector = self.parse_identifier();
452 if selector.is_empty() {
453 break;
454 }
455 selector
456 };
457
458 if !seen.insert(selector.clone()) {
459 return Err(self.error(&format!("Duplicate selector: {selector}"), None));
460 }
461
462 self.skip_whitespace();
463 self.expect_char('{', None)?;
464 let value = self.parse_message(depth + 1, parent_arg)?;
465 self.expect_char('}', None)?;
466 options.insert(selector, IcuPluralOption { value });
467 self.skip_whitespace();
468 }
469
470 if options.is_empty() {
471 return Err(self.error("Expected at least one plural option", None));
472 }
473 if self.requires_other && !options.contains_key("other") {
474 return Err(self.error("Missing 'other' clause", None));
475 }
476
477 Ok(options)
478 }
479
480 fn parse_select_options(
481 &mut self,
482 depth: usize,
483 ) -> Result<BTreeMap<String, IcuSelectOption>, IcuParseError> {
484 let mut options = BTreeMap::new();
485 let mut seen = BTreeSet::new();
486
487 while self.pos < self.msg.len() && self.current_char() != Some('}') {
488 self.skip_whitespace();
489 let selector = self.parse_identifier();
490 if selector.is_empty() {
491 break;
492 }
493
494 if !seen.insert(selector.clone()) {
495 return Err(self.error(&format!("Duplicate selector: {selector}"), None));
496 }
497
498 self.skip_whitespace();
499 self.expect_char('{', None)?;
500 let value = self.parse_message(depth + 1, ParentArgType::None)?;
501 self.expect_char('}', None)?;
502 options.insert(selector, IcuSelectOption { value });
503 self.skip_whitespace();
504 }
505
506 if options.is_empty() {
507 return Err(self.error("Expected at least one select option", None));
508 }
509 if self.requires_other && !options.contains_key("other") {
510 return Err(self.error("Missing 'other' clause", None));
511 }
512
513 Ok(options)
514 }
515
516 fn parse_tag(
517 &mut self,
518 depth: usize,
519 parent_arg: ParentArgType,
520 ) -> Result<IcuNode, IcuParseError> {
521 let start = self.pos;
522 self.pos += 1;
523 let tag_name = self.parse_tag_name();
524 self.skip_whitespace();
525
526 if self.remaining().starts_with("/>") {
527 self.pos += 2;
528 return Ok(IcuNode::Literal {
529 value: format!("<{tag_name}/>"),
530 });
531 }
532
533 self.expect_char('>', Some(start))?;
534 let children = self.parse_message(depth + 1, parent_arg)?;
535
536 if !self.remaining().starts_with("</") {
537 return Err(self.error("Unclosed tag", Some(start)));
538 }
539 self.pos += 2;
540
541 let closing_name = self.parse_tag_name();
542 if closing_name != tag_name {
543 return Err(self.error(
544 &format!("Mismatched tag: expected </{tag_name}>, got </{closing_name}>"),
545 Some(start),
546 ));
547 }
548
549 self.skip_whitespace();
550 self.expect_char('>', Some(start))?;
551
552 Ok(IcuNode::Tag {
553 value: tag_name,
554 children,
555 })
556 }
557
558 fn parse_literal(&mut self, depth: usize, in_plural: bool) -> IcuNode {
559 let mut value = String::new();
560
561 while self.pos < self.msg.len() {
562 let Some(ch) = self.current_char() else {
563 break;
564 };
565
566 if ch == '{' || (ch == '}' && depth > 0) {
567 break;
568 }
569 if ch == '#' && in_plural {
570 break;
571 }
572 if ch == '<' && !self.ignore_tag {
573 let next = self.peek_char(1);
574 if matches!(next, Some(c) if is_alpha(c) || c.is_ascii_digit()) || next == Some('/')
575 {
576 break;
577 }
578 }
579
580 if ch == '\'' {
581 let next = self.peek_char(1);
582 if next == Some('\'') {
583 value.push('\'');
584 self.pos += 2;
585 } else if matches!(next, Some('{') | Some('}') | Some('<') | Some('>'))
586 || (next == Some('#') && in_plural)
587 {
588 self.pos += 1;
589 while self.pos < self.msg.len() {
590 let Some(quoted) = self.current_char() else {
591 break;
592 };
593 if quoted == '\'' {
594 if self.peek_char(1) == Some('\'') {
595 value.push('\'');
596 self.pos += 2;
597 } else {
598 self.pos += 1;
599 break;
600 }
601 } else {
602 value.push(quoted);
603 self.pos += quoted.len_utf8();
604 }
605 }
606 } else {
607 value.push(ch);
608 self.pos += 1;
609 }
610 } else {
611 value.push(ch);
612 self.pos += ch.len_utf8();
613 }
614 }
615
616 IcuNode::Literal { value }
617 }
618
619 fn parse_style(&mut self) -> String {
620 let start = self.pos;
621 let mut brace_depth = 0usize;
622
623 while self.pos < self.msg.len() {
624 let Some(ch) = self.current_char() else {
625 break;
626 };
627
628 if ch == '\'' {
629 self.pos += 1;
630 while self.pos < self.msg.len() && self.current_char() != Some('\'') {
631 self.pos += self.current_char().map_or(1, char::len_utf8);
632 }
633 if self.pos < self.msg.len() {
634 self.pos += 1;
635 }
636 } else if ch == '{' {
637 brace_depth += 1;
638 self.pos += 1;
639 } else if ch == '}' {
640 if brace_depth == 0 {
641 break;
642 }
643 brace_depth -= 1;
644 self.pos += 1;
645 } else {
646 self.pos += ch.len_utf8();
647 }
648 }
649
650 self.msg[start..self.pos].trim().to_owned()
651 }
652
653 fn parse_identifier(&mut self) -> String {
654 let start = self.pos;
655 while self.pos < self.msg.len() {
656 let Some(ch) = self.current_char() else {
657 break;
658 };
659 if !is_identifier_char(ch) {
660 break;
661 }
662 self.pos += ch.len_utf8();
663 }
664 self.msg[start..self.pos].to_owned()
665 }
666
667 fn parse_tag_name(&mut self) -> String {
668 let start = self.pos;
669 while self.pos < self.msg.len() {
670 let Some(ch) = self.current_char() else {
671 break;
672 };
673 if !is_tag_char(ch) {
674 break;
675 }
676 self.pos += ch.len_utf8();
677 }
678 self.msg[start..self.pos].to_owned()
679 }
680
681 fn parse_integer(&mut self, error_pos: Option<usize>) -> Result<i32, IcuParseError> {
682 let start = self.pos;
683 let mut sign = 1;
684
685 match self.current_char() {
686 Some('-') => {
687 sign = -1;
688 self.pos += 1;
689 }
690 Some('+') => self.pos += 1,
691 _ => {}
692 }
693
694 let digits_start = self.pos;
695 while self.pos < self.msg.len() && self.current_char().is_some_and(|ch| ch.is_ascii_digit())
696 {
697 self.pos += 1;
698 }
699
700 if self.pos == digits_start {
701 return Err(self.error("Expected integer", error_pos.or(Some(start))));
702 }
703
704 let number = self.msg[digits_start..self.pos]
705 .parse::<i32>()
706 .map_err(|_| self.error("Expected integer", error_pos.or(Some(start))))?;
707
708 Ok(sign * number)
709 }
710
711 fn skip_whitespace(&mut self) {
712 while self.pos < self.msg.len()
713 && self
714 .current_char()
715 .is_some_and(|ch| matches!(ch, ' ' | '\t' | '\n' | '\r'))
716 {
717 self.pos += self.current_char().map_or(1, char::len_utf8);
718 }
719 }
720
721 fn peek_identifier(&mut self) -> String {
722 let start = self.pos;
723 let identifier = self.parse_identifier();
724 self.pos = start;
725 identifier
726 }
727
728 fn expect_char(
729 &mut self,
730 expected: char,
731 error_pos: Option<usize>,
732 ) -> Result<(), IcuParseError> {
733 if self.current_char() != Some(expected) {
734 return Err(self.error(&format!("Expected '{expected}'"), error_pos));
735 }
736 self.pos += expected.len_utf8();
737 Ok(())
738 }
739
740 fn error(&self, message: &str, offset: Option<usize>) -> IcuParseError {
741 IcuParseError {
742 kind: IcuErrorKind::SyntaxError,
743 message: message.to_owned(),
744 offset: offset.unwrap_or(self.pos),
745 }
746 }
747
748 fn current_char(&self) -> Option<char> {
749 self.msg[self.pos..].chars().next()
750 }
751
752 fn peek_char(&self, ahead: usize) -> Option<char> {
753 self.msg[self.pos..].chars().nth(ahead)
754 }
755
756 fn remaining(&self) -> &str {
757 &self.msg[self.pos..]
758 }
759}
760
761pub fn parse_icu(message: &str, options: IcuParserOptions) -> Result<Vec<IcuNode>, IcuParseError> {
763 IcuParser::new(message, options).parse()
764}
765
766#[must_use]
768pub fn validate_icu(message: &str, options: IcuParserOptions) -> IcuValidationResult {
769 match parse_icu(message, options) {
770 Ok(_) => IcuValidationResult {
771 valid: true,
772 errors: Vec::new(),
773 },
774 Err(error) => IcuValidationResult {
775 valid: false,
776 errors: vec![error],
777 },
778 }
779}
780
781#[must_use]
783pub fn extract_variables(message: &str) -> Vec<String> {
784 parse_icu(
785 message,
786 IcuParserOptions {
787 requires_other_clause: false,
788 ..IcuParserOptions::default()
789 },
790 )
791 .map_or_else(|_| Vec::new(), |ast| extract_variables_from_ast(&ast))
792}
793
794#[must_use]
796pub fn extract_variable_info(message: &str) -> Vec<IcuVariable> {
797 parse_icu(
798 message,
799 IcuParserOptions {
800 requires_other_clause: false,
801 ..IcuParserOptions::default()
802 },
803 )
804 .map_or_else(|_| Vec::new(), |ast| extract_variable_info_from_ast(&ast))
805}
806
807#[must_use]
809pub fn compare_variables(source: &str, translation: &str) -> IcuVariableComparison {
810 let source_vars = extract_variables(source)
811 .into_iter()
812 .collect::<BTreeSet<_>>();
813 let translation_vars = extract_variables(translation)
814 .into_iter()
815 .collect::<BTreeSet<_>>();
816
817 let missing = source_vars
818 .difference(&translation_vars)
819 .cloned()
820 .collect::<Vec<_>>();
821 let extra = translation_vars
822 .difference(&source_vars)
823 .cloned()
824 .collect::<Vec<_>>();
825
826 IcuVariableComparison {
827 is_match: missing.is_empty() && extra.is_empty(),
828 missing,
829 extra,
830 }
831}
832
833#[must_use]
835pub fn has_plural(message: &str) -> bool {
836 parse_icu(
837 message,
838 IcuParserOptions {
839 requires_other_clause: false,
840 ..IcuParserOptions::default()
841 },
842 )
843 .is_ok_and(|ast| contains_node_type(&ast, |node| matches!(node, IcuNode::Plural { .. })))
844}
845
846#[must_use]
848pub fn has_select(message: &str) -> bool {
849 parse_icu(
850 message,
851 IcuParserOptions {
852 requires_other_clause: false,
853 ..IcuParserOptions::default()
854 },
855 )
856 .is_ok_and(|ast| contains_node_type(&ast, |node| matches!(node, IcuNode::Select { .. })))
857}
858
859#[must_use]
861pub fn has_select_ordinal(message: &str) -> bool {
862 parse_icu(
863 message,
864 IcuParserOptions {
865 requires_other_clause: false,
866 ..IcuParserOptions::default()
867 },
868 )
869 .is_ok_and(|ast| {
870 contains_node_type(&ast, |node| {
871 matches!(
872 node,
873 IcuNode::Plural {
874 plural_type: IcuPluralType::Ordinal,
875 ..
876 }
877 )
878 })
879 })
880}
881
882#[must_use]
884pub fn has_icu_syntax(message: &str) -> bool {
885 parse_icu(
886 message,
887 IcuParserOptions {
888 requires_other_clause: false,
889 ignore_tag: true,
890 },
891 )
892 .is_ok_and(|ast| {
893 ast.iter()
894 .any(|node| !matches!(node, IcuNode::Literal { .. }))
895 })
896}
897
898#[must_use]
900pub fn is_plural_item(item: &PoItem) -> bool {
901 item.msgid_plural.is_some() && item.msgstr.len() > 1
902}
903
904#[must_use]
906pub fn gettext_to_icu(item: &PoItem, options: &GettextToIcuOptions) -> Option<String> {
907 if !is_plural_item(item) {
908 return None;
909 }
910
911 let categories = get_plural_categories(&options.locale);
912 let clauses = item
913 .msgstr
914 .iter()
915 .enumerate()
916 .map(|(index, translation)| {
917 let category = categories.get(index).copied().unwrap_or("other");
918 let text = if options.expand_octothorpe {
919 translation.replace('#', &format!("{{{}}}", options.plural_variable))
920 } else {
921 translation.clone()
922 };
923 format!("{category} {{{text}}}")
924 })
925 .collect::<Vec<_>>()
926 .join(" ");
927
928 Some(format!(
929 "{{{}, plural, {clauses}}}",
930 options.plural_variable
931 ))
932}
933
934pub fn normalize_item_to_icu(item: &mut PoItem, options: &GettextToIcuOptions) -> bool {
936 match gettext_to_icu(item, options) {
937 Some(icu) => {
938 item.msgstr = vec![icu];
939 item.msgid_plural = Some(String::new());
940 true
941 }
942 None => false,
943 }
944}
945
946pub fn normalize_to_icu_in_place(po: &mut PoFile, options: &GettextToIcuOptions) {
948 for item in &mut po.items {
949 let _ = normalize_item_to_icu(item, options);
950 }
951}
952
953#[must_use]
955pub fn normalize_to_icu(po: &PoFile, options: &GettextToIcuOptions) -> PoFile {
956 let mut cloned = po.clone();
957 normalize_to_icu_in_place(&mut cloned, options);
958 cloned
959}
960
961#[must_use]
963pub fn icu_to_gettext_source(
964 icu: &str,
965 expand_octothorpe: bool,
966) -> Option<(String, String, String)> {
967 let ast = parse_icu(
968 icu,
969 IcuParserOptions {
970 requires_other_clause: false,
971 ..IcuParserOptions::default()
972 },
973 )
974 .ok()?;
975
976 let IcuNode::Plural { value, options, .. } = ast.first()? else {
977 return None;
978 };
979
980 if options.len() < 2 {
981 return None;
982 }
983
984 let singular = options
985 .get("one")
986 .or_else(|| options.values().next())
987 .map(flatten_option_text)?;
988 let plural = options
989 .get("other")
990 .or_else(|| options.values().last())
991 .map(flatten_option_text)?;
992
993 let expand = |text: String| {
994 if expand_octothorpe {
995 text.replace('#', &format!("{{{value}}}"))
996 } else {
997 text
998 }
999 };
1000
1001 Some((expand(singular), expand(plural), value.clone()))
1002}
1003
1004fn extract_variables_from_ast(nodes: &[IcuNode]) -> Vec<String> {
1005 let mut variables = BTreeSet::new();
1006 for_each_node(nodes, &mut |node| {
1007 if let Some(name) = node_variable_name(node) {
1008 variables.insert(name.to_owned());
1009 }
1010 });
1011 variables.into_iter().collect()
1012}
1013
1014fn extract_variable_info_from_ast(nodes: &[IcuNode]) -> Vec<IcuVariable> {
1015 let mut variables = Vec::new();
1016 let mut seen = BTreeSet::new();
1017
1018 for_each_node(nodes, &mut |node| {
1019 if let Some(variable) = node_to_variable(node) {
1020 if seen.insert(variable.name.clone()) {
1021 variables.push(variable);
1022 }
1023 }
1024 });
1025
1026 variables
1027}
1028
1029fn flatten_option_text<T>(option: &T) -> String
1030where
1031 T: OptionNodes,
1032{
1033 option
1034 .nodes()
1035 .iter()
1036 .map(flatten_node_text)
1037 .collect::<Vec<_>>()
1038 .join("")
1039}
1040
1041fn contains_node_type(nodes: &[IcuNode], predicate: impl Fn(&IcuNode) -> bool + Copy) -> bool {
1042 some_node(nodes, predicate)
1043}
1044
1045fn some_node(nodes: &[IcuNode], predicate: impl Fn(&IcuNode) -> bool + Copy) -> bool {
1046 for node in nodes {
1047 if predicate(node) {
1048 return true;
1049 }
1050
1051 match node {
1052 IcuNode::Plural { options, .. } => {
1053 for child in plural_child_nodes(options) {
1054 if predicate(child) || some_node(std::slice::from_ref(child), predicate) {
1055 return true;
1056 }
1057 }
1058 }
1059 IcuNode::Select { options, .. } => {
1060 for child in select_child_nodes(options) {
1061 if predicate(child) || some_node(std::slice::from_ref(child), predicate) {
1062 return true;
1063 }
1064 }
1065 }
1066 IcuNode::Tag { children, .. } => {
1067 if some_node(children, predicate) {
1068 return true;
1069 }
1070 }
1071 IcuNode::Literal { .. }
1072 | IcuNode::Argument { .. }
1073 | IcuNode::Number { .. }
1074 | IcuNode::Date { .. }
1075 | IcuNode::Time { .. }
1076 | IcuNode::List { .. }
1077 | IcuNode::Duration { .. }
1078 | IcuNode::Ago { .. }
1079 | IcuNode::Name { .. }
1080 | IcuNode::Pound => {}
1081 }
1082 }
1083
1084 false
1085}
1086
1087fn for_each_node(nodes: &[IcuNode], callback: &mut dyn FnMut(&IcuNode)) {
1088 for node in nodes {
1089 callback(node);
1090 match node {
1091 IcuNode::Plural { options, .. } => {
1092 for option in options.values() {
1093 for_each_node(&option.value, callback);
1094 }
1095 }
1096 IcuNode::Select { options, .. } => {
1097 for option in options.values() {
1098 for_each_node(&option.value, callback);
1099 }
1100 }
1101 IcuNode::Tag { children, .. } => for_each_node(children, callback),
1102 IcuNode::Literal { .. }
1103 | IcuNode::Argument { .. }
1104 | IcuNode::Number { .. }
1105 | IcuNode::Date { .. }
1106 | IcuNode::Time { .. }
1107 | IcuNode::List { .. }
1108 | IcuNode::Duration { .. }
1109 | IcuNode::Ago { .. }
1110 | IcuNode::Name { .. }
1111 | IcuNode::Pound => {}
1112 }
1113 }
1114}
1115
1116fn plural_child_nodes<'a>(
1117 options: &'a BTreeMap<String, IcuPluralOption>,
1118) -> impl Iterator<Item = &'a IcuNode> + 'a {
1119 options.values().flat_map(|option| option.value.iter())
1120}
1121
1122fn select_child_nodes<'a>(
1123 options: &'a BTreeMap<String, IcuSelectOption>,
1124) -> impl Iterator<Item = &'a IcuNode> + 'a {
1125 options.values().flat_map(|option| option.value.iter())
1126}
1127
1128fn node_variable_name(node: &IcuNode) -> Option<&str> {
1129 match node {
1130 IcuNode::Argument { value }
1131 | IcuNode::Number { value, .. }
1132 | IcuNode::Date { value, .. }
1133 | IcuNode::Time { value, .. }
1134 | IcuNode::List { value, .. }
1135 | IcuNode::Duration { value, .. }
1136 | IcuNode::Ago { value, .. }
1137 | IcuNode::Name { value, .. }
1138 | IcuNode::Plural { value, .. }
1139 | IcuNode::Select { value, .. } => Some(value),
1140 IcuNode::Literal { .. } | IcuNode::Pound | IcuNode::Tag { .. } => None,
1141 }
1142}
1143
1144fn node_to_variable(node: &IcuNode) -> Option<IcuVariable> {
1145 match node {
1146 IcuNode::Argument { value } => Some(IcuVariable {
1147 name: value.clone(),
1148 kind: String::from("argument"),
1149 style: None,
1150 }),
1151 IcuNode::Number { value, style } => Some(IcuVariable {
1152 name: value.clone(),
1153 kind: String::from("number"),
1154 style: style.clone(),
1155 }),
1156 IcuNode::Date { value, style } => Some(IcuVariable {
1157 name: value.clone(),
1158 kind: String::from("date"),
1159 style: style.clone(),
1160 }),
1161 IcuNode::Time { value, style } => Some(IcuVariable {
1162 name: value.clone(),
1163 kind: String::from("time"),
1164 style: style.clone(),
1165 }),
1166 IcuNode::List { value, style }
1167 | IcuNode::Duration { value, style }
1168 | IcuNode::Ago { value, style }
1169 | IcuNode::Name { value, style } => Some(IcuVariable {
1170 name: value.clone(),
1171 kind: String::from("argument"),
1172 style: style.clone(),
1173 }),
1174 IcuNode::Plural { value, .. } => Some(IcuVariable {
1175 name: value.clone(),
1176 kind: String::from("plural"),
1177 style: None,
1178 }),
1179 IcuNode::Select { value, .. } => Some(IcuVariable {
1180 name: value.clone(),
1181 kind: String::from("select"),
1182 style: None,
1183 }),
1184 IcuNode::Literal { .. } | IcuNode::Pound | IcuNode::Tag { .. } => None,
1185 }
1186}
1187
1188fn is_alpha(ch: char) -> bool {
1189 ch.is_ascii_alphabetic()
1190}
1191
1192fn is_identifier_char(ch: char) -> bool {
1193 !matches!(
1194 ch,
1195 ' ' | '\t' | '\n' | '\r' | '{' | '}' | '#' | '<' | '>' | ',' | ':'
1196 )
1197}
1198
1199fn is_tag_char(ch: char) -> bool {
1200 ch.is_ascii_alphanumeric() || matches!(ch, '-' | '.' | ':' | '_')
1201}
1202
1203fn flatten_node_text(node: &IcuNode) -> String {
1204 match node {
1205 IcuNode::Literal { value }
1206 | IcuNode::Argument { value }
1207 | IcuNode::Number { value, .. }
1208 | IcuNode::Date { value, .. }
1209 | IcuNode::Time { value, .. }
1210 | IcuNode::List { value, .. }
1211 | IcuNode::Duration { value, .. }
1212 | IcuNode::Ago { value, .. }
1213 | IcuNode::Name { value, .. } => value.clone(),
1214 IcuNode::Pound => String::from("#"),
1215 IcuNode::Tag { children, .. } => children.iter().map(flatten_node_text).collect(),
1216 IcuNode::Plural { .. } | IcuNode::Select { .. } => String::new(),
1217 }
1218}
1219
1220trait OptionNodes {
1221 fn nodes(&self) -> &[IcuNode];
1222}
1223
1224impl OptionNodes for IcuPluralOption {
1225 fn nodes(&self) -> &[IcuNode] {
1226 &self.value
1227 }
1228}
1229
1230impl OptionNodes for IcuSelectOption {
1231 fn nodes(&self) -> &[IcuNode] {
1232 &self.value
1233 }
1234}
1235
1236#[cfg(test)]
1237mod tests {
1238 use super::{
1239 compare_variables, extract_variable_info, extract_variables, gettext_to_icu,
1240 has_icu_syntax, has_plural, has_select, has_select_ordinal, icu_to_gettext_source,
1241 is_plural_item, normalize_item_to_icu, normalize_to_icu, normalize_to_icu_in_place,
1242 parse_icu, validate_icu, GettextToIcuOptions, IcuNode, IcuParserOptions, IcuPluralType,
1243 };
1244 use crate::po::{PoFile, PoItem};
1245
1246 #[test]
1247 fn parse_icu_parses_literals_and_arguments() {
1248 let ast = parse_icu("Hello {name}", IcuParserOptions::default()).expect("should parse");
1249 assert_eq!(
1250 ast,
1251 vec![
1252 IcuNode::Literal {
1253 value: String::from("Hello "),
1254 },
1255 IcuNode::Argument {
1256 value: String::from("name"),
1257 },
1258 ]
1259 );
1260 }
1261
1262 #[test]
1263 fn parse_icu_parses_formatted_arguments() {
1264 let ast = parse_icu("{price, number, currency}", IcuParserOptions::default())
1265 .expect("should parse");
1266 assert_eq!(
1267 ast[0],
1268 IcuNode::Number {
1269 value: String::from("price"),
1270 style: Some(String::from("currency")),
1271 }
1272 );
1273 }
1274
1275 #[test]
1276 fn parse_icu_parses_plural_and_pound_nodes() {
1277 let ast = parse_icu(
1278 "{count, plural, one {# item} other {# items}}",
1279 IcuParserOptions::default(),
1280 )
1281 .expect("should parse");
1282
1283 match &ast[0] {
1284 IcuNode::Plural {
1285 value,
1286 options,
1287 plural_type,
1288 ..
1289 } => {
1290 assert_eq!(value, "count");
1291 assert_eq!(*plural_type, IcuPluralType::Cardinal);
1292 assert!(options.contains_key("one"));
1293 assert!(options.contains_key("other"));
1294 }
1295 other => panic!("expected plural node, got {other:?}"),
1296 }
1297 }
1298
1299 #[test]
1300 fn parse_icu_parses_select_and_tags() {
1301 let ast = parse_icu(
1302 "{gender, select, male {He} other {<b>They</b>}}",
1303 IcuParserOptions::default(),
1304 )
1305 .expect("should parse");
1306
1307 match &ast[0] {
1308 IcuNode::Select { value, options } => {
1309 assert_eq!(value, "gender");
1310 assert!(options.contains_key("male"));
1311 assert!(options.contains_key("other"));
1312 }
1313 other => panic!("expected select node, got {other:?}"),
1314 }
1315 }
1316
1317 #[test]
1318 fn parse_icu_handles_quotes_and_escaped_apostrophes() {
1319 let ast = parse_icu(
1320 "This is a '{placeholder}' and it''s fine",
1321 IcuParserOptions::default(),
1322 )
1323 .expect("should parse");
1324 assert_eq!(
1325 ast,
1326 vec![IcuNode::Literal {
1327 value: String::from("This is a {placeholder} and it's fine"),
1328 }]
1329 );
1330 }
1331
1332 #[test]
1333 fn validate_icu_reports_missing_other_clause() {
1334 let result = validate_icu(
1335 "{n, plural, one {#}}",
1336 IcuParserOptions {
1337 requires_other_clause: true,
1338 ..IcuParserOptions::default()
1339 },
1340 );
1341 assert!(!result.valid);
1342 assert!(result.errors[0].message.contains("Missing 'other' clause"));
1343 }
1344
1345 #[test]
1346 fn extractors_and_predicates_work() {
1347 let message = "{name} has {count, plural, one {# item} other {# items}}";
1348 assert_eq!(
1349 extract_variables(message),
1350 vec![String::from("count"), String::from("name")]
1351 );
1352 assert_eq!(extract_variable_info(message).len(), 2);
1353 assert!(has_plural(message));
1354 assert!(!has_select(message));
1355 assert!(!has_select_ordinal(message));
1356 assert!(has_icu_syntax(message));
1357 }
1358
1359 #[test]
1360 fn compare_variables_detects_missing_and_extra() {
1361 let comparison = compare_variables("Hello {name}", "Hallo {userName}");
1362 assert_eq!(comparison.missing, vec![String::from("name")]);
1363 assert_eq!(comparison.extra, vec![String::from("userName")]);
1364 assert!(!comparison.is_match);
1365 }
1366
1367 #[test]
1368 fn parse_icu_parses_selectordinal() {
1369 let ast = parse_icu(
1370 "{n, selectordinal, one {#st} two {#nd} other {#th}}",
1371 IcuParserOptions::default(),
1372 )
1373 .expect("should parse");
1374
1375 match &ast[0] {
1376 IcuNode::Plural { plural_type, .. } => assert_eq!(*plural_type, IcuPluralType::Ordinal),
1377 other => panic!("expected plural node, got {other:?}"),
1378 }
1379 }
1380
1381 fn plural_item(msgstr: &[&str]) -> PoItem {
1382 let mut item = PoItem::new(2);
1383 item.msgid = String::from("One item");
1384 item.msgid_plural = Some(String::from("{count} items"));
1385 item.msgstr = msgstr.iter().map(|value| (*value).to_owned()).collect();
1386 item
1387 }
1388
1389 #[test]
1390 fn gettext_to_icu_converts_plural_forms() {
1391 let item = plural_item(&["Ein Artikel", "{count} Artikel"]);
1392 let result = gettext_to_icu(&item, &GettextToIcuOptions::new("de"));
1393 assert_eq!(
1394 result,
1395 Some(String::from(
1396 "{count, plural, one {Ein Artikel} other {{count} Artikel}}"
1397 ))
1398 );
1399 }
1400
1401 #[test]
1402 fn gettext_to_icu_handles_multi_form_locales() {
1403 let item = plural_item(&["plik", "pliki", "plików", "pliki"]);
1404 let result = gettext_to_icu(&item, &GettextToIcuOptions::new("pl"));
1405 assert_eq!(
1406 result,
1407 Some(String::from(
1408 "{count, plural, one {plik} few {pliki} many {plików} other {pliki}}"
1409 ))
1410 );
1411 }
1412
1413 #[test]
1414 fn normalize_helpers_convert_plural_items() {
1415 let mut item = plural_item(&["Ein Artikel", "{count} Artikel"]);
1416 assert!(is_plural_item(&item));
1417 assert!(normalize_item_to_icu(
1418 &mut item,
1419 &GettextToIcuOptions::new("de")
1420 ));
1421 assert_eq!(item.msgstr.len(), 1);
1422
1423 let mut po = PoFile::new();
1424 po.items
1425 .push(plural_item(&["Ein Artikel", "{count} Artikel"]));
1426 let cloned = normalize_to_icu(&po, &GettextToIcuOptions::new("de"));
1427 assert_ne!(po.items[0].msgstr, cloned.items[0].msgstr);
1428
1429 normalize_to_icu_in_place(&mut po, &GettextToIcuOptions::new("de"));
1430 assert_eq!(po.items[0].msgstr, cloned.items[0].msgstr);
1431 }
1432
1433 #[test]
1434 fn icu_to_gettext_source_extracts_singular_and_plural() {
1435 let source = icu_to_gettext_source("{count, plural, one {# item} other {# items}}", true);
1436 assert_eq!(
1437 source,
1438 Some((
1439 String::from("{count} item"),
1440 String::from("{count} items"),
1441 String::from("count")
1442 ))
1443 );
1444 }
1445}