Skip to main content

pofile/
icu.rs

1//! ICU MessageFormat parser and analysis helpers.
2
3use std::collections::{BTreeMap, BTreeSet};
4use std::error::Error;
5use std::fmt::{Display, Formatter};
6
7use crate::plurals::get_plural_categories;
8use crate::po::{PoFile, PoItem};
9
10/// Relative-time style string.
11pub type IcuAgoStyle = String;
12
13/// Parser options for ICU MessageFormat.
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub struct IcuParserOptions {
16    /// Treat tags such as `<b>` as literal text.
17    pub ignore_tag: bool,
18    /// Require an `other` clause for plural/select constructs.
19    pub requires_other_clause: bool,
20}
21
22impl Default for IcuParserOptions {
23    fn default() -> Self {
24        Self {
25            ignore_tag: false,
26            requires_other_clause: true,
27        }
28    }
29}
30
31/// Parse error kind.
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum IcuErrorKind {
34    /// Syntax error.
35    SyntaxError,
36}
37
38/// Plural mode.
39#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40pub enum IcuPluralType {
41    /// Cardinal plural.
42    Cardinal,
43    /// Ordinal plural.
44    Ordinal,
45}
46
47/// Option in a plural expression.
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct IcuPluralOption {
50    /// Nested AST nodes.
51    pub value: Vec<IcuNode>,
52}
53
54/// Option in a select expression.
55#[derive(Debug, Clone, PartialEq, Eq)]
56pub struct IcuSelectOption {
57    /// Nested AST nodes.
58    pub value: Vec<IcuNode>,
59}
60
61/// ICU AST node.
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub enum IcuNode {
64    /// Literal text.
65    Literal {
66        /// Literal text content.
67        value: String,
68    },
69    /// Simple argument.
70    Argument {
71        /// Variable name.
72        value: String,
73    },
74    /// Number formatting.
75    Number {
76        /// Variable name.
77        value: String,
78        /// Opaque style string.
79        style: Option<String>,
80    },
81    /// Date formatting.
82    Date {
83        /// Variable name.
84        value: String,
85        /// Opaque style string.
86        style: Option<String>,
87    },
88    /// Time formatting.
89    Time {
90        /// Variable name.
91        value: String,
92        /// Opaque style string.
93        style: Option<String>,
94    },
95    /// List formatting.
96    List {
97        /// Variable name.
98        value: String,
99        /// Opaque style string.
100        style: Option<String>,
101    },
102    /// Duration formatting.
103    Duration {
104        /// Variable name.
105        value: String,
106        /// Opaque style string.
107        style: Option<String>,
108    },
109    /// Relative-time formatting.
110    Ago {
111        /// Variable name.
112        value: String,
113        /// Opaque style string.
114        style: Option<String>,
115    },
116    /// Display-name formatting.
117    Name {
118        /// Variable name.
119        value: String,
120        /// Opaque style string.
121        style: Option<String>,
122    },
123    /// Select expression.
124    Select {
125        /// Variable name.
126        value: String,
127        /// Selector options.
128        options: BTreeMap<String, IcuSelectOption>,
129    },
130    /// Plural or selectordinal expression.
131    Plural {
132        /// Variable name.
133        value: String,
134        /// Selector options.
135        options: BTreeMap<String, IcuPluralOption>,
136        /// Optional offset.
137        offset: i32,
138        /// Plural mode.
139        plural_type: IcuPluralType,
140    },
141    /// `#` inside plural/selectordinal.
142    Pound,
143    /// XML-like tag node.
144    Tag {
145        /// Tag name.
146        value: String,
147        /// Child nodes.
148        children: Vec<IcuNode>,
149    },
150}
151
152/// Parser error.
153#[derive(Debug, Clone, PartialEq, Eq)]
154pub struct IcuParseError {
155    /// Error kind.
156    pub kind: IcuErrorKind,
157    /// Human-readable error message.
158    pub message: String,
159    /// Offset in the source string.
160    pub offset: usize,
161}
162
163impl Display for IcuParseError {
164    fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
165        write!(
166            formatter,
167            "ICU syntax error at position {}: {}",
168            self.offset, self.message
169        )
170    }
171}
172
173impl Error for IcuParseError {}
174
175/// Variable descriptor extracted from an ICU message.
176#[derive(Debug, Clone, PartialEq, Eq)]
177pub struct IcuVariable {
178    /// Variable name.
179    pub name: String,
180    /// Variable type.
181    pub kind: String,
182    /// Optional style hint.
183    pub style: Option<String>,
184}
185
186/// Validation result for an ICU message.
187#[derive(Debug, Clone, PartialEq, Eq)]
188pub struct IcuValidationResult {
189    /// Whether parsing succeeded.
190    pub valid: bool,
191    /// Parse errors, if any.
192    pub errors: Vec<IcuParseError>,
193}
194
195/// Variable comparison result.
196#[derive(Debug, Clone, PartialEq, Eq)]
197pub struct IcuVariableComparison {
198    /// Variables missing in the translation.
199    pub missing: Vec<String>,
200    /// Extra variables present in the translation.
201    pub extra: Vec<String>,
202    /// Whether both sides match exactly.
203    pub is_match: bool,
204}
205
206/// Options for Gettext-to-ICU conversion.
207#[derive(Debug, Clone, PartialEq, Eq)]
208pub struct GettextToIcuOptions {
209    /// Target locale used to map plural indices to categories.
210    pub locale: String,
211    /// Variable name used for the resulting ICU plural expression.
212    pub plural_variable: String,
213    /// Replace `#` with `{plural_variable}`.
214    pub expand_octothorpe: bool,
215}
216
217impl GettextToIcuOptions {
218    /// Create conversion options for a locale.
219    #[must_use]
220    pub fn new(locale: impl Into<String>) -> Self {
221        Self {
222            locale: locale.into(),
223            plural_variable: String::from("count"),
224            expand_octothorpe: true,
225        }
226    }
227}
228
229#[derive(Debug, Clone, Copy, PartialEq, Eq)]
230enum ParentArgType {
231    Plural,
232    SelectOrdinal,
233    None,
234}
235
236/// ICU parser.
237pub struct IcuParser<'a> {
238    pos: usize,
239    msg: &'a str,
240    ignore_tag: bool,
241    requires_other: bool,
242}
243
244impl<'a> IcuParser<'a> {
245    /// Create a new parser.
246    #[must_use]
247    pub fn new(message: &'a str, options: IcuParserOptions) -> Self {
248        Self {
249            pos: 0,
250            msg: message,
251            ignore_tag: options.ignore_tag,
252            requires_other: options.requires_other_clause,
253        }
254    }
255
256    /// Parse the full message.
257    pub fn parse(mut self) -> Result<Vec<IcuNode>, IcuParseError> {
258        let result = self.parse_message(0, ParentArgType::None)?;
259        if self.pos < self.msg.len() {
260            return Err(self.error("Unexpected character", None));
261        }
262        Ok(result)
263    }
264
265    fn parse_message(
266        &mut self,
267        depth: usize,
268        parent_arg: ParentArgType,
269    ) -> Result<Vec<IcuNode>, IcuParseError> {
270        let mut nodes = Vec::new();
271        let in_plural = matches!(
272            parent_arg,
273            ParentArgType::Plural | ParentArgType::SelectOrdinal
274        );
275
276        while self.pos < self.msg.len() {
277            let ch = self.current_char();
278            match ch {
279                Some('{') => nodes.push(self.parse_argument(depth)?),
280                Some('}') if depth > 0 => break,
281                Some('#') if in_plural => {
282                    self.pos += 1;
283                    nodes.push(IcuNode::Pound);
284                }
285                Some('<') if !self.ignore_tag => {
286                    let next = self.peek_char(1);
287                    if matches!(next, Some(c) if is_alpha(c) || c.is_ascii_digit()) {
288                        nodes.push(self.parse_tag(depth, parent_arg)?);
289                    } else if next == Some('/') {
290                        break;
291                    } else {
292                        nodes.push(self.parse_literal(depth, in_plural));
293                    }
294                }
295                _ => nodes.push(self.parse_literal(depth, in_plural)),
296            }
297        }
298
299        Ok(nodes)
300    }
301
302    fn parse_argument(&mut self, depth: usize) -> Result<IcuNode, IcuParseError> {
303        let start = self.pos;
304        self.pos += 1;
305        self.skip_whitespace();
306
307        if self.current_char() == Some('}') {
308            return Err(self.error("Empty argument", Some(start)));
309        }
310
311        let name = self.parse_identifier();
312        if name.is_empty() {
313            return Err(self.error("Expected argument name", Some(start)));
314        }
315
316        self.skip_whitespace();
317        if self.current_char() == Some('}') {
318            self.pos += 1;
319            return Ok(IcuNode::Argument { value: name });
320        }
321
322        if self.current_char() != Some(',') {
323            return Err(self.error("Expected ',' or '}'", Some(start)));
324        }
325        self.pos += 1;
326        self.skip_whitespace();
327
328        let arg_type = self.parse_identifier();
329        if arg_type.is_empty() {
330            return Err(self.error("Expected argument type", Some(start)));
331        }
332
333        let normalized_type = arg_type.to_lowercase();
334        match normalized_type.as_str() {
335            "number" | "date" | "time" | "list" | "duration" | "ago" | "name" => {
336                self.parse_formatted_arg(&normalized_type, name, start)
337            }
338            "plural" | "selectordinal" => self.parse_plural(&normalized_type, name, depth, start),
339            "select" => self.parse_select(name, depth, start),
340            _ => Err(self.error(&format!("Invalid argument type: {arg_type}"), Some(start))),
341        }
342    }
343
344    fn parse_formatted_arg(
345        &mut self,
346        arg_type: &str,
347        name: String,
348        start: usize,
349    ) -> Result<IcuNode, IcuParseError> {
350        self.skip_whitespace();
351        let mut style = None;
352
353        if self.current_char() == Some(',') {
354            self.pos += 1;
355            self.skip_whitespace();
356            let parsed_style = self.parse_style();
357            if parsed_style.is_empty() {
358                return Err(self.error("Expected style", Some(start)));
359            }
360            style = Some(parsed_style);
361        }
362
363        self.expect_char('}', Some(start))?;
364
365        Ok(match arg_type {
366            "number" => IcuNode::Number { value: name, style },
367            "date" => IcuNode::Date { value: name, style },
368            "time" => IcuNode::Time { value: name, style },
369            "list" => IcuNode::List { value: name, style },
370            "duration" => IcuNode::Duration { value: name, style },
371            "ago" => IcuNode::Ago { value: name, style },
372            "name" => IcuNode::Name { value: name, style },
373            _ => unreachable!(),
374        })
375    }
376
377    fn parse_plural(
378        &mut self,
379        arg_type: &str,
380        name: String,
381        depth: usize,
382        start: usize,
383    ) -> Result<IcuNode, IcuParseError> {
384        self.skip_whitespace();
385        self.expect_char(',', Some(start))?;
386        self.skip_whitespace();
387
388        let mut offset = 0;
389        if self.peek_identifier() == "offset" {
390            let _ = self.parse_identifier();
391            self.expect_char(':', Some(start))?;
392            self.skip_whitespace();
393            offset = self.parse_integer(Some(start))?;
394            self.skip_whitespace();
395        }
396
397        let parent = if arg_type == "plural" {
398            ParentArgType::Plural
399        } else {
400            ParentArgType::SelectOrdinal
401        };
402        let options = self.parse_plural_options(depth, parent)?;
403        self.expect_char('}', Some(start))?;
404
405        Ok(IcuNode::Plural {
406            value: name,
407            options,
408            offset,
409            plural_type: if arg_type == "plural" {
410                IcuPluralType::Cardinal
411            } else {
412                IcuPluralType::Ordinal
413            },
414        })
415    }
416
417    fn parse_select(
418        &mut self,
419        name: String,
420        depth: usize,
421        start: usize,
422    ) -> Result<IcuNode, IcuParseError> {
423        self.skip_whitespace();
424        self.expect_char(',', Some(start))?;
425        self.skip_whitespace();
426
427        let options = self.parse_select_options(depth)?;
428        self.expect_char('}', Some(start))?;
429
430        Ok(IcuNode::Select {
431            value: name,
432            options,
433        })
434    }
435
436    fn parse_plural_options(
437        &mut self,
438        depth: usize,
439        parent_arg: ParentArgType,
440    ) -> Result<BTreeMap<String, IcuPluralOption>, IcuParseError> {
441        let mut options = BTreeMap::new();
442        let mut seen = BTreeSet::new();
443
444        while self.pos < self.msg.len() && self.current_char() != Some('}') {
445            self.skip_whitespace();
446
447            let selector = if self.current_char() == Some('=') {
448                self.pos += 1;
449                format!("={}", self.parse_integer(None)?)
450            } else {
451                let selector = self.parse_identifier();
452                if selector.is_empty() {
453                    break;
454                }
455                selector
456            };
457
458            if !seen.insert(selector.clone()) {
459                return Err(self.error(&format!("Duplicate selector: {selector}"), None));
460            }
461
462            self.skip_whitespace();
463            self.expect_char('{', None)?;
464            let value = self.parse_message(depth + 1, parent_arg)?;
465            self.expect_char('}', None)?;
466            options.insert(selector, IcuPluralOption { value });
467            self.skip_whitespace();
468        }
469
470        if options.is_empty() {
471            return Err(self.error("Expected at least one plural option", None));
472        }
473        if self.requires_other && !options.contains_key("other") {
474            return Err(self.error("Missing 'other' clause", None));
475        }
476
477        Ok(options)
478    }
479
480    fn parse_select_options(
481        &mut self,
482        depth: usize,
483    ) -> Result<BTreeMap<String, IcuSelectOption>, IcuParseError> {
484        let mut options = BTreeMap::new();
485        let mut seen = BTreeSet::new();
486
487        while self.pos < self.msg.len() && self.current_char() != Some('}') {
488            self.skip_whitespace();
489            let selector = self.parse_identifier();
490            if selector.is_empty() {
491                break;
492            }
493
494            if !seen.insert(selector.clone()) {
495                return Err(self.error(&format!("Duplicate selector: {selector}"), None));
496            }
497
498            self.skip_whitespace();
499            self.expect_char('{', None)?;
500            let value = self.parse_message(depth + 1, ParentArgType::None)?;
501            self.expect_char('}', None)?;
502            options.insert(selector, IcuSelectOption { value });
503            self.skip_whitespace();
504        }
505
506        if options.is_empty() {
507            return Err(self.error("Expected at least one select option", None));
508        }
509        if self.requires_other && !options.contains_key("other") {
510            return Err(self.error("Missing 'other' clause", None));
511        }
512
513        Ok(options)
514    }
515
516    fn parse_tag(
517        &mut self,
518        depth: usize,
519        parent_arg: ParentArgType,
520    ) -> Result<IcuNode, IcuParseError> {
521        let start = self.pos;
522        self.pos += 1;
523        let tag_name = self.parse_tag_name();
524        self.skip_whitespace();
525
526        if self.remaining().starts_with("/>") {
527            self.pos += 2;
528            return Ok(IcuNode::Literal {
529                value: format!("<{tag_name}/>"),
530            });
531        }
532
533        self.expect_char('>', Some(start))?;
534        let children = self.parse_message(depth + 1, parent_arg)?;
535
536        if !self.remaining().starts_with("</") {
537            return Err(self.error("Unclosed tag", Some(start)));
538        }
539        self.pos += 2;
540
541        let closing_name = self.parse_tag_name();
542        if closing_name != tag_name {
543            return Err(self.error(
544                &format!("Mismatched tag: expected </{tag_name}>, got </{closing_name}>"),
545                Some(start),
546            ));
547        }
548
549        self.skip_whitespace();
550        self.expect_char('>', Some(start))?;
551
552        Ok(IcuNode::Tag {
553            value: tag_name,
554            children,
555        })
556    }
557
558    fn parse_literal(&mut self, depth: usize, in_plural: bool) -> IcuNode {
559        let mut value = String::new();
560
561        while self.pos < self.msg.len() {
562            let Some(ch) = self.current_char() else {
563                break;
564            };
565
566            if ch == '{' || (ch == '}' && depth > 0) {
567                break;
568            }
569            if ch == '#' && in_plural {
570                break;
571            }
572            if ch == '<' && !self.ignore_tag {
573                let next = self.peek_char(1);
574                if matches!(next, Some(c) if is_alpha(c) || c.is_ascii_digit()) || next == Some('/')
575                {
576                    break;
577                }
578            }
579
580            if ch == '\'' {
581                let next = self.peek_char(1);
582                if next == Some('\'') {
583                    value.push('\'');
584                    self.pos += 2;
585                } else if matches!(next, Some('{') | Some('}') | Some('<') | Some('>'))
586                    || (next == Some('#') && in_plural)
587                {
588                    self.pos += 1;
589                    while self.pos < self.msg.len() {
590                        let Some(quoted) = self.current_char() else {
591                            break;
592                        };
593                        if quoted == '\'' {
594                            if self.peek_char(1) == Some('\'') {
595                                value.push('\'');
596                                self.pos += 2;
597                            } else {
598                                self.pos += 1;
599                                break;
600                            }
601                        } else {
602                            value.push(quoted);
603                            self.pos += quoted.len_utf8();
604                        }
605                    }
606                } else {
607                    value.push(ch);
608                    self.pos += 1;
609                }
610            } else {
611                value.push(ch);
612                self.pos += ch.len_utf8();
613            }
614        }
615
616        IcuNode::Literal { value }
617    }
618
619    fn parse_style(&mut self) -> String {
620        let start = self.pos;
621        let mut brace_depth = 0usize;
622
623        while self.pos < self.msg.len() {
624            let Some(ch) = self.current_char() else {
625                break;
626            };
627
628            if ch == '\'' {
629                self.pos += 1;
630                while self.pos < self.msg.len() && self.current_char() != Some('\'') {
631                    self.pos += self.current_char().map_or(1, char::len_utf8);
632                }
633                if self.pos < self.msg.len() {
634                    self.pos += 1;
635                }
636            } else if ch == '{' {
637                brace_depth += 1;
638                self.pos += 1;
639            } else if ch == '}' {
640                if brace_depth == 0 {
641                    break;
642                }
643                brace_depth -= 1;
644                self.pos += 1;
645            } else {
646                self.pos += ch.len_utf8();
647            }
648        }
649
650        self.msg[start..self.pos].trim().to_owned()
651    }
652
653    fn parse_identifier(&mut self) -> String {
654        let start = self.pos;
655        while self.pos < self.msg.len() {
656            let Some(ch) = self.current_char() else {
657                break;
658            };
659            if !is_identifier_char(ch) {
660                break;
661            }
662            self.pos += ch.len_utf8();
663        }
664        self.msg[start..self.pos].to_owned()
665    }
666
667    fn parse_tag_name(&mut self) -> String {
668        let start = self.pos;
669        while self.pos < self.msg.len() {
670            let Some(ch) = self.current_char() else {
671                break;
672            };
673            if !is_tag_char(ch) {
674                break;
675            }
676            self.pos += ch.len_utf8();
677        }
678        self.msg[start..self.pos].to_owned()
679    }
680
681    fn parse_integer(&mut self, error_pos: Option<usize>) -> Result<i32, IcuParseError> {
682        let start = self.pos;
683        let mut sign = 1;
684
685        match self.current_char() {
686            Some('-') => {
687                sign = -1;
688                self.pos += 1;
689            }
690            Some('+') => self.pos += 1,
691            _ => {}
692        }
693
694        let digits_start = self.pos;
695        while self.pos < self.msg.len() && self.current_char().is_some_and(|ch| ch.is_ascii_digit())
696        {
697            self.pos += 1;
698        }
699
700        if self.pos == digits_start {
701            return Err(self.error("Expected integer", error_pos.or(Some(start))));
702        }
703
704        let number = self.msg[digits_start..self.pos]
705            .parse::<i32>()
706            .map_err(|_| self.error("Expected integer", error_pos.or(Some(start))))?;
707
708        Ok(sign * number)
709    }
710
711    fn skip_whitespace(&mut self) {
712        while self.pos < self.msg.len()
713            && self
714                .current_char()
715                .is_some_and(|ch| matches!(ch, ' ' | '\t' | '\n' | '\r'))
716        {
717            self.pos += self.current_char().map_or(1, char::len_utf8);
718        }
719    }
720
721    fn peek_identifier(&mut self) -> String {
722        let start = self.pos;
723        let identifier = self.parse_identifier();
724        self.pos = start;
725        identifier
726    }
727
728    fn expect_char(
729        &mut self,
730        expected: char,
731        error_pos: Option<usize>,
732    ) -> Result<(), IcuParseError> {
733        if self.current_char() != Some(expected) {
734            return Err(self.error(&format!("Expected '{expected}'"), error_pos));
735        }
736        self.pos += expected.len_utf8();
737        Ok(())
738    }
739
740    fn error(&self, message: &str, offset: Option<usize>) -> IcuParseError {
741        IcuParseError {
742            kind: IcuErrorKind::SyntaxError,
743            message: message.to_owned(),
744            offset: offset.unwrap_or(self.pos),
745        }
746    }
747
748    fn current_char(&self) -> Option<char> {
749        self.msg[self.pos..].chars().next()
750    }
751
752    fn peek_char(&self, ahead: usize) -> Option<char> {
753        self.msg[self.pos..].chars().nth(ahead)
754    }
755
756    fn remaining(&self) -> &str {
757        &self.msg[self.pos..]
758    }
759}
760
761/// Parse an ICU MessageFormat string.
762pub fn parse_icu(message: &str, options: IcuParserOptions) -> Result<Vec<IcuNode>, IcuParseError> {
763    IcuParser::new(message, options).parse()
764}
765
766/// Validate an ICU message.
767#[must_use]
768pub fn validate_icu(message: &str, options: IcuParserOptions) -> IcuValidationResult {
769    match parse_icu(message, options) {
770        Ok(_) => IcuValidationResult {
771            valid: true,
772            errors: Vec::new(),
773        },
774        Err(error) => IcuValidationResult {
775            valid: false,
776            errors: vec![error],
777        },
778    }
779}
780
781/// Extract variable names from an ICU message.
782#[must_use]
783pub fn extract_variables(message: &str) -> Vec<String> {
784    parse_icu(
785        message,
786        IcuParserOptions {
787            requires_other_clause: false,
788            ..IcuParserOptions::default()
789        },
790    )
791    .map_or_else(|_| Vec::new(), |ast| extract_variables_from_ast(&ast))
792}
793
794/// Extract variable details from an ICU message.
795#[must_use]
796pub fn extract_variable_info(message: &str) -> Vec<IcuVariable> {
797    parse_icu(
798        message,
799        IcuParserOptions {
800            requires_other_clause: false,
801            ..IcuParserOptions::default()
802        },
803    )
804    .map_or_else(|_| Vec::new(), |ast| extract_variable_info_from_ast(&ast))
805}
806
807/// Compare variable sets between source and translation.
808#[must_use]
809pub fn compare_variables(source: &str, translation: &str) -> IcuVariableComparison {
810    let source_vars = extract_variables(source)
811        .into_iter()
812        .collect::<BTreeSet<_>>();
813    let translation_vars = extract_variables(translation)
814        .into_iter()
815        .collect::<BTreeSet<_>>();
816
817    let missing = source_vars
818        .difference(&translation_vars)
819        .cloned()
820        .collect::<Vec<_>>();
821    let extra = translation_vars
822        .difference(&source_vars)
823        .cloned()
824        .collect::<Vec<_>>();
825
826    IcuVariableComparison {
827        is_match: missing.is_empty() && extra.is_empty(),
828        missing,
829        extra,
830    }
831}
832
833/// Check whether a message contains a plural node.
834#[must_use]
835pub fn has_plural(message: &str) -> bool {
836    parse_icu(
837        message,
838        IcuParserOptions {
839            requires_other_clause: false,
840            ..IcuParserOptions::default()
841        },
842    )
843    .is_ok_and(|ast| contains_node_type(&ast, |node| matches!(node, IcuNode::Plural { .. })))
844}
845
846/// Check whether a message contains a select node.
847#[must_use]
848pub fn has_select(message: &str) -> bool {
849    parse_icu(
850        message,
851        IcuParserOptions {
852            requires_other_clause: false,
853            ..IcuParserOptions::default()
854        },
855    )
856    .is_ok_and(|ast| contains_node_type(&ast, |node| matches!(node, IcuNode::Select { .. })))
857}
858
859/// Check whether a message contains a selectordinal node.
860#[must_use]
861pub fn has_select_ordinal(message: &str) -> bool {
862    parse_icu(
863        message,
864        IcuParserOptions {
865            requires_other_clause: false,
866            ..IcuParserOptions::default()
867        },
868    )
869    .is_ok_and(|ast| {
870        contains_node_type(&ast, |node| {
871            matches!(
872                node,
873                IcuNode::Plural {
874                    plural_type: IcuPluralType::Ordinal,
875                    ..
876                }
877            )
878        })
879    })
880}
881
882/// Check whether a message contains any ICU syntax.
883#[must_use]
884pub fn has_icu_syntax(message: &str) -> bool {
885    parse_icu(
886        message,
887        IcuParserOptions {
888            requires_other_clause: false,
889            ignore_tag: true,
890        },
891    )
892    .is_ok_and(|ast| {
893        ast.iter()
894            .any(|node| !matches!(node, IcuNode::Literal { .. }))
895    })
896}
897
898/// Check whether a PO item uses gettext plural forms.
899#[must_use]
900pub fn is_plural_item(item: &PoItem) -> bool {
901    item.msgid_plural.is_some() && item.msgstr.len() > 1
902}
903
904/// Convert a gettext plural item to ICU MessageFormat.
905#[must_use]
906pub fn gettext_to_icu(item: &PoItem, options: &GettextToIcuOptions) -> Option<String> {
907    if !is_plural_item(item) {
908        return None;
909    }
910
911    let categories = get_plural_categories(&options.locale);
912    let clauses = item
913        .msgstr
914        .iter()
915        .enumerate()
916        .map(|(index, translation)| {
917            let category = categories.get(index).copied().unwrap_or("other");
918            let text = if options.expand_octothorpe {
919                translation.replace('#', &format!("{{{}}}", options.plural_variable))
920            } else {
921                translation.clone()
922            };
923            format!("{category} {{{text}}}")
924        })
925        .collect::<Vec<_>>()
926        .join(" ");
927
928    Some(format!(
929        "{{{}, plural, {clauses}}}",
930        options.plural_variable
931    ))
932}
933
934/// Normalize a plural item to ICU format in place.
935pub fn normalize_item_to_icu(item: &mut PoItem, options: &GettextToIcuOptions) -> bool {
936    match gettext_to_icu(item, options) {
937        Some(icu) => {
938            item.msgstr = vec![icu];
939            item.msgid_plural = Some(String::new());
940            true
941        }
942        None => false,
943    }
944}
945
946/// Normalize all plural items in a PO file in place.
947pub fn normalize_to_icu_in_place(po: &mut PoFile, options: &GettextToIcuOptions) {
948    for item in &mut po.items {
949        let _ = normalize_item_to_icu(item, options);
950    }
951}
952
953/// Normalize all plural items in a PO file and return a cloned result.
954#[must_use]
955pub fn normalize_to_icu(po: &PoFile, options: &GettextToIcuOptions) -> PoFile {
956    let mut cloned = po.clone();
957    normalize_to_icu_in_place(&mut cloned, options);
958    cloned
959}
960
961/// Convert an ICU plural expression back to gettext-style source strings.
962#[must_use]
963pub fn icu_to_gettext_source(
964    icu: &str,
965    expand_octothorpe: bool,
966) -> Option<(String, String, String)> {
967    let ast = parse_icu(
968        icu,
969        IcuParserOptions {
970            requires_other_clause: false,
971            ..IcuParserOptions::default()
972        },
973    )
974    .ok()?;
975
976    let IcuNode::Plural { value, options, .. } = ast.first()? else {
977        return None;
978    };
979
980    if options.len() < 2 {
981        return None;
982    }
983
984    let singular = options
985        .get("one")
986        .or_else(|| options.values().next())
987        .map(flatten_option_text)?;
988    let plural = options
989        .get("other")
990        .or_else(|| options.values().last())
991        .map(flatten_option_text)?;
992
993    let expand = |text: String| {
994        if expand_octothorpe {
995            text.replace('#', &format!("{{{value}}}"))
996        } else {
997            text
998        }
999    };
1000
1001    Some((expand(singular), expand(plural), value.clone()))
1002}
1003
1004fn extract_variables_from_ast(nodes: &[IcuNode]) -> Vec<String> {
1005    let mut variables = BTreeSet::new();
1006    for_each_node(nodes, &mut |node| {
1007        if let Some(name) = node_variable_name(node) {
1008            variables.insert(name.to_owned());
1009        }
1010    });
1011    variables.into_iter().collect()
1012}
1013
1014fn extract_variable_info_from_ast(nodes: &[IcuNode]) -> Vec<IcuVariable> {
1015    let mut variables = Vec::new();
1016    let mut seen = BTreeSet::new();
1017
1018    for_each_node(nodes, &mut |node| {
1019        if let Some(variable) = node_to_variable(node) {
1020            if seen.insert(variable.name.clone()) {
1021                variables.push(variable);
1022            }
1023        }
1024    });
1025
1026    variables
1027}
1028
1029fn flatten_option_text<T>(option: &T) -> String
1030where
1031    T: OptionNodes,
1032{
1033    option
1034        .nodes()
1035        .iter()
1036        .map(flatten_node_text)
1037        .collect::<Vec<_>>()
1038        .join("")
1039}
1040
1041fn contains_node_type(nodes: &[IcuNode], predicate: impl Fn(&IcuNode) -> bool + Copy) -> bool {
1042    some_node(nodes, predicate)
1043}
1044
1045fn some_node(nodes: &[IcuNode], predicate: impl Fn(&IcuNode) -> bool + Copy) -> bool {
1046    for node in nodes {
1047        if predicate(node) {
1048            return true;
1049        }
1050
1051        match node {
1052            IcuNode::Plural { options, .. } => {
1053                for child in plural_child_nodes(options) {
1054                    if predicate(child) || some_node(std::slice::from_ref(child), predicate) {
1055                        return true;
1056                    }
1057                }
1058            }
1059            IcuNode::Select { options, .. } => {
1060                for child in select_child_nodes(options) {
1061                    if predicate(child) || some_node(std::slice::from_ref(child), predicate) {
1062                        return true;
1063                    }
1064                }
1065            }
1066            IcuNode::Tag { children, .. } => {
1067                if some_node(children, predicate) {
1068                    return true;
1069                }
1070            }
1071            IcuNode::Literal { .. }
1072            | IcuNode::Argument { .. }
1073            | IcuNode::Number { .. }
1074            | IcuNode::Date { .. }
1075            | IcuNode::Time { .. }
1076            | IcuNode::List { .. }
1077            | IcuNode::Duration { .. }
1078            | IcuNode::Ago { .. }
1079            | IcuNode::Name { .. }
1080            | IcuNode::Pound => {}
1081        }
1082    }
1083
1084    false
1085}
1086
1087fn for_each_node(nodes: &[IcuNode], callback: &mut dyn FnMut(&IcuNode)) {
1088    for node in nodes {
1089        callback(node);
1090        match node {
1091            IcuNode::Plural { options, .. } => {
1092                for option in options.values() {
1093                    for_each_node(&option.value, callback);
1094                }
1095            }
1096            IcuNode::Select { options, .. } => {
1097                for option in options.values() {
1098                    for_each_node(&option.value, callback);
1099                }
1100            }
1101            IcuNode::Tag { children, .. } => for_each_node(children, callback),
1102            IcuNode::Literal { .. }
1103            | IcuNode::Argument { .. }
1104            | IcuNode::Number { .. }
1105            | IcuNode::Date { .. }
1106            | IcuNode::Time { .. }
1107            | IcuNode::List { .. }
1108            | IcuNode::Duration { .. }
1109            | IcuNode::Ago { .. }
1110            | IcuNode::Name { .. }
1111            | IcuNode::Pound => {}
1112        }
1113    }
1114}
1115
1116fn plural_child_nodes<'a>(
1117    options: &'a BTreeMap<String, IcuPluralOption>,
1118) -> impl Iterator<Item = &'a IcuNode> + 'a {
1119    options.values().flat_map(|option| option.value.iter())
1120}
1121
1122fn select_child_nodes<'a>(
1123    options: &'a BTreeMap<String, IcuSelectOption>,
1124) -> impl Iterator<Item = &'a IcuNode> + 'a {
1125    options.values().flat_map(|option| option.value.iter())
1126}
1127
1128fn node_variable_name(node: &IcuNode) -> Option<&str> {
1129    match node {
1130        IcuNode::Argument { value }
1131        | IcuNode::Number { value, .. }
1132        | IcuNode::Date { value, .. }
1133        | IcuNode::Time { value, .. }
1134        | IcuNode::List { value, .. }
1135        | IcuNode::Duration { value, .. }
1136        | IcuNode::Ago { value, .. }
1137        | IcuNode::Name { value, .. }
1138        | IcuNode::Plural { value, .. }
1139        | IcuNode::Select { value, .. } => Some(value),
1140        IcuNode::Literal { .. } | IcuNode::Pound | IcuNode::Tag { .. } => None,
1141    }
1142}
1143
1144fn node_to_variable(node: &IcuNode) -> Option<IcuVariable> {
1145    match node {
1146        IcuNode::Argument { value } => Some(IcuVariable {
1147            name: value.clone(),
1148            kind: String::from("argument"),
1149            style: None,
1150        }),
1151        IcuNode::Number { value, style } => Some(IcuVariable {
1152            name: value.clone(),
1153            kind: String::from("number"),
1154            style: style.clone(),
1155        }),
1156        IcuNode::Date { value, style } => Some(IcuVariable {
1157            name: value.clone(),
1158            kind: String::from("date"),
1159            style: style.clone(),
1160        }),
1161        IcuNode::Time { value, style } => Some(IcuVariable {
1162            name: value.clone(),
1163            kind: String::from("time"),
1164            style: style.clone(),
1165        }),
1166        IcuNode::List { value, style }
1167        | IcuNode::Duration { value, style }
1168        | IcuNode::Ago { value, style }
1169        | IcuNode::Name { value, style } => Some(IcuVariable {
1170            name: value.clone(),
1171            kind: String::from("argument"),
1172            style: style.clone(),
1173        }),
1174        IcuNode::Plural { value, .. } => Some(IcuVariable {
1175            name: value.clone(),
1176            kind: String::from("plural"),
1177            style: None,
1178        }),
1179        IcuNode::Select { value, .. } => Some(IcuVariable {
1180            name: value.clone(),
1181            kind: String::from("select"),
1182            style: None,
1183        }),
1184        IcuNode::Literal { .. } | IcuNode::Pound | IcuNode::Tag { .. } => None,
1185    }
1186}
1187
1188fn is_alpha(ch: char) -> bool {
1189    ch.is_ascii_alphabetic()
1190}
1191
1192fn is_identifier_char(ch: char) -> bool {
1193    !matches!(
1194        ch,
1195        ' ' | '\t' | '\n' | '\r' | '{' | '}' | '#' | '<' | '>' | ',' | ':'
1196    )
1197}
1198
1199fn is_tag_char(ch: char) -> bool {
1200    ch.is_ascii_alphanumeric() || matches!(ch, '-' | '.' | ':' | '_')
1201}
1202
1203fn flatten_node_text(node: &IcuNode) -> String {
1204    match node {
1205        IcuNode::Literal { value }
1206        | IcuNode::Argument { value }
1207        | IcuNode::Number { value, .. }
1208        | IcuNode::Date { value, .. }
1209        | IcuNode::Time { value, .. }
1210        | IcuNode::List { value, .. }
1211        | IcuNode::Duration { value, .. }
1212        | IcuNode::Ago { value, .. }
1213        | IcuNode::Name { value, .. } => value.clone(),
1214        IcuNode::Pound => String::from("#"),
1215        IcuNode::Tag { children, .. } => children.iter().map(flatten_node_text).collect(),
1216        IcuNode::Plural { .. } | IcuNode::Select { .. } => String::new(),
1217    }
1218}
1219
1220trait OptionNodes {
1221    fn nodes(&self) -> &[IcuNode];
1222}
1223
1224impl OptionNodes for IcuPluralOption {
1225    fn nodes(&self) -> &[IcuNode] {
1226        &self.value
1227    }
1228}
1229
1230impl OptionNodes for IcuSelectOption {
1231    fn nodes(&self) -> &[IcuNode] {
1232        &self.value
1233    }
1234}
1235
1236#[cfg(test)]
1237mod tests {
1238    use super::{
1239        compare_variables, extract_variable_info, extract_variables, gettext_to_icu,
1240        has_icu_syntax, has_plural, has_select, has_select_ordinal, icu_to_gettext_source,
1241        is_plural_item, normalize_item_to_icu, normalize_to_icu, normalize_to_icu_in_place,
1242        parse_icu, validate_icu, GettextToIcuOptions, IcuNode, IcuParserOptions, IcuPluralType,
1243    };
1244    use crate::po::{PoFile, PoItem};
1245
1246    #[test]
1247    fn parse_icu_parses_literals_and_arguments() {
1248        let ast = parse_icu("Hello {name}", IcuParserOptions::default()).expect("should parse");
1249        assert_eq!(
1250            ast,
1251            vec![
1252                IcuNode::Literal {
1253                    value: String::from("Hello "),
1254                },
1255                IcuNode::Argument {
1256                    value: String::from("name"),
1257                },
1258            ]
1259        );
1260    }
1261
1262    #[test]
1263    fn parse_icu_parses_formatted_arguments() {
1264        let ast = parse_icu("{price, number, currency}", IcuParserOptions::default())
1265            .expect("should parse");
1266        assert_eq!(
1267            ast[0],
1268            IcuNode::Number {
1269                value: String::from("price"),
1270                style: Some(String::from("currency")),
1271            }
1272        );
1273    }
1274
1275    #[test]
1276    fn parse_icu_parses_plural_and_pound_nodes() {
1277        let ast = parse_icu(
1278            "{count, plural, one {# item} other {# items}}",
1279            IcuParserOptions::default(),
1280        )
1281        .expect("should parse");
1282
1283        match &ast[0] {
1284            IcuNode::Plural {
1285                value,
1286                options,
1287                plural_type,
1288                ..
1289            } => {
1290                assert_eq!(value, "count");
1291                assert_eq!(*plural_type, IcuPluralType::Cardinal);
1292                assert!(options.contains_key("one"));
1293                assert!(options.contains_key("other"));
1294            }
1295            other => panic!("expected plural node, got {other:?}"),
1296        }
1297    }
1298
1299    #[test]
1300    fn parse_icu_parses_select_and_tags() {
1301        let ast = parse_icu(
1302            "{gender, select, male {He} other {<b>They</b>}}",
1303            IcuParserOptions::default(),
1304        )
1305        .expect("should parse");
1306
1307        match &ast[0] {
1308            IcuNode::Select { value, options } => {
1309                assert_eq!(value, "gender");
1310                assert!(options.contains_key("male"));
1311                assert!(options.contains_key("other"));
1312            }
1313            other => panic!("expected select node, got {other:?}"),
1314        }
1315    }
1316
1317    #[test]
1318    fn parse_icu_handles_quotes_and_escaped_apostrophes() {
1319        let ast = parse_icu(
1320            "This is a '{placeholder}' and it''s fine",
1321            IcuParserOptions::default(),
1322        )
1323        .expect("should parse");
1324        assert_eq!(
1325            ast,
1326            vec![IcuNode::Literal {
1327                value: String::from("This is a {placeholder} and it's fine"),
1328            }]
1329        );
1330    }
1331
1332    #[test]
1333    fn validate_icu_reports_missing_other_clause() {
1334        let result = validate_icu(
1335            "{n, plural, one {#}}",
1336            IcuParserOptions {
1337                requires_other_clause: true,
1338                ..IcuParserOptions::default()
1339            },
1340        );
1341        assert!(!result.valid);
1342        assert!(result.errors[0].message.contains("Missing 'other' clause"));
1343    }
1344
1345    #[test]
1346    fn extractors_and_predicates_work() {
1347        let message = "{name} has {count, plural, one {# item} other {# items}}";
1348        assert_eq!(
1349            extract_variables(message),
1350            vec![String::from("count"), String::from("name")]
1351        );
1352        assert_eq!(extract_variable_info(message).len(), 2);
1353        assert!(has_plural(message));
1354        assert!(!has_select(message));
1355        assert!(!has_select_ordinal(message));
1356        assert!(has_icu_syntax(message));
1357    }
1358
1359    #[test]
1360    fn compare_variables_detects_missing_and_extra() {
1361        let comparison = compare_variables("Hello {name}", "Hallo {userName}");
1362        assert_eq!(comparison.missing, vec![String::from("name")]);
1363        assert_eq!(comparison.extra, vec![String::from("userName")]);
1364        assert!(!comparison.is_match);
1365    }
1366
1367    #[test]
1368    fn parse_icu_parses_selectordinal() {
1369        let ast = parse_icu(
1370            "{n, selectordinal, one {#st} two {#nd} other {#th}}",
1371            IcuParserOptions::default(),
1372        )
1373        .expect("should parse");
1374
1375        match &ast[0] {
1376            IcuNode::Plural { plural_type, .. } => assert_eq!(*plural_type, IcuPluralType::Ordinal),
1377            other => panic!("expected plural node, got {other:?}"),
1378        }
1379    }
1380
1381    fn plural_item(msgstr: &[&str]) -> PoItem {
1382        let mut item = PoItem::new(2);
1383        item.msgid = String::from("One item");
1384        item.msgid_plural = Some(String::from("{count} items"));
1385        item.msgstr = msgstr.iter().map(|value| (*value).to_owned()).collect();
1386        item
1387    }
1388
1389    #[test]
1390    fn gettext_to_icu_converts_plural_forms() {
1391        let item = plural_item(&["Ein Artikel", "{count} Artikel"]);
1392        let result = gettext_to_icu(&item, &GettextToIcuOptions::new("de"));
1393        assert_eq!(
1394            result,
1395            Some(String::from(
1396                "{count, plural, one {Ein Artikel} other {{count} Artikel}}"
1397            ))
1398        );
1399    }
1400
1401    #[test]
1402    fn gettext_to_icu_handles_multi_form_locales() {
1403        let item = plural_item(&["plik", "pliki", "plików", "pliki"]);
1404        let result = gettext_to_icu(&item, &GettextToIcuOptions::new("pl"));
1405        assert_eq!(
1406            result,
1407            Some(String::from(
1408                "{count, plural, one {plik} few {pliki} many {plików} other {pliki}}"
1409            ))
1410        );
1411    }
1412
1413    #[test]
1414    fn normalize_helpers_convert_plural_items() {
1415        let mut item = plural_item(&["Ein Artikel", "{count} Artikel"]);
1416        assert!(is_plural_item(&item));
1417        assert!(normalize_item_to_icu(
1418            &mut item,
1419            &GettextToIcuOptions::new("de")
1420        ));
1421        assert_eq!(item.msgstr.len(), 1);
1422
1423        let mut po = PoFile::new();
1424        po.items
1425            .push(plural_item(&["Ein Artikel", "{count} Artikel"]));
1426        let cloned = normalize_to_icu(&po, &GettextToIcuOptions::new("de"));
1427        assert_ne!(po.items[0].msgstr, cloned.items[0].msgstr);
1428
1429        normalize_to_icu_in_place(&mut po, &GettextToIcuOptions::new("de"));
1430        assert_eq!(po.items[0].msgstr, cloned.items[0].msgstr);
1431    }
1432
1433    #[test]
1434    fn icu_to_gettext_source_extracts_singular_and_plural() {
1435        let source = icu_to_gettext_source("{count, plural, one {# item} other {# items}}", true);
1436        assert_eq!(
1437            source,
1438            Some((
1439                String::from("{count} item"),
1440                String::from("{count} items"),
1441                String::from("count")
1442            ))
1443        );
1444    }
1445}