Skip to main content

ferrocat_icu/
metadata.rs

1use std::collections::{BTreeMap, BTreeSet};
2
3use serde::{Deserialize, Serialize};
4
5use crate::{
6    IcuAnalysis, IcuArgumentKind, IcuDiagnosticSeverity, IcuParseError, IcuPluralKind,
7    IcuStyleKind, analyze_icu, parse_icu,
8};
9
10/// Authoring input for semantic message metadata.
11///
12/// This is the progressive, JSON-friendly shape. Only `msgid` is required;
13/// semantic fields may be omitted and derived from the ICU MessageFormat v1
14/// source where possible.
15#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
16#[serde(deny_unknown_fields)]
17pub struct MessageMetadataInput {
18    /// Exact catalog identity and authored source payload.
19    pub msgid: String,
20    /// Optional gettext-style context used to disambiguate identical `msgid`s.
21    #[serde(default, skip_serializing_if = "Option::is_none")]
22    pub msgctxt: Option<String>,
23    /// Optional translator-facing note.
24    #[serde(default, skip_serializing_if = "Option::is_none")]
25    pub description: Option<String>,
26    /// Optional extraction origins.
27    #[serde(default, skip_serializing_if = "Vec::is_empty")]
28    pub origin: Vec<MessageOriginMetadata>,
29    /// Optional argument metadata keyed by argument name.
30    #[serde(default, skip_serializing_if = "Option::is_none")]
31    pub args: Option<BTreeMap<String, MessageArgumentMetadataInput>>,
32    /// Optional rich-text tag names.
33    #[serde(default, skip_serializing_if = "Option::is_none")]
34    pub tags: Option<Vec<String>>,
35    /// Optional selector metadata keyed by selecting argument name.
36    #[serde(default, skip_serializing_if = "Option::is_none")]
37    pub selectors: Option<BTreeMap<String, MessageSelectorMetadata>>,
38}
39
40impl MessageMetadataInput {
41    /// Creates a minimal metadata input from a `msgid`.
42    #[must_use]
43    pub fn new(msgid: impl Into<String>) -> Self {
44        Self {
45            msgid: msgid.into(),
46            msgctxt: None,
47            description: None,
48            origin: Vec::new(),
49            args: None,
50            tags: None,
51            selectors: None,
52        }
53    }
54}
55
56/// Normalized semantic metadata for one source message.
57#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
58#[serde(deny_unknown_fields)]
59pub struct MessageMetadata {
60    /// Exact catalog identity and authored source payload.
61    pub msgid: String,
62    /// Optional gettext-style context used to disambiguate identical `msgid`s.
63    #[serde(default, skip_serializing_if = "Option::is_none")]
64    pub msgctxt: Option<String>,
65    /// Optional translator-facing note.
66    #[serde(default, skip_serializing_if = "Option::is_none")]
67    pub description: Option<String>,
68    /// Optional extraction origins.
69    #[serde(default, skip_serializing_if = "Vec::is_empty")]
70    pub origin: Vec<MessageOriginMetadata>,
71    /// Normalized argument metadata keyed by argument name.
72    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
73    pub args: BTreeMap<String, MessageArgumentMetadata>,
74    /// Rich-text tag names in first-seen order.
75    #[serde(default, skip_serializing_if = "Vec::is_empty")]
76    pub tags: Vec<String>,
77    /// Normalized selector metadata keyed by selecting argument name.
78    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
79    pub selectors: BTreeMap<String, MessageSelectorMetadata>,
80}
81
82/// Extraction origin attached to semantic message metadata.
83#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
84#[serde(deny_unknown_fields)]
85pub struct MessageOriginMetadata {
86    /// Source file path, when known.
87    #[serde(default, skip_serializing_if = "Option::is_none")]
88    pub file: Option<String>,
89    /// Source line number, when known.
90    #[serde(default, skip_serializing_if = "Option::is_none")]
91    pub line: Option<u32>,
92    /// Host component name, when known.
93    #[serde(default, skip_serializing_if = "Option::is_none")]
94    pub component: Option<String>,
95    /// Host route or page identifier, when known.
96    #[serde(default, skip_serializing_if = "Option::is_none")]
97    pub route: Option<String>,
98}
99
100/// Progressive authoring input for one argument.
101#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
102#[serde(untagged)]
103pub enum MessageArgumentMetadataInput {
104    /// Shorthand kind form, for example `"string"`.
105    Kind(MessageArgumentKind),
106    /// Full object form.
107    Details(MessageArgumentMetadata),
108}
109
110impl From<MessageArgumentKind> for MessageArgumentMetadataInput {
111    fn from(kind: MessageArgumentKind) -> Self {
112        Self::Kind(kind)
113    }
114}
115
116impl From<MessageArgumentMetadata> for MessageArgumentMetadataInput {
117    fn from(metadata: MessageArgumentMetadata) -> Self {
118        Self::Details(metadata)
119    }
120}
121
122impl MessageArgumentMetadataInput {
123    fn into_metadata(self) -> MessageArgumentMetadata {
124        match self {
125            Self::Kind(kind) => MessageArgumentMetadata {
126                kind,
127                ..MessageArgumentMetadata::default()
128            },
129            Self::Details(metadata) => metadata,
130        }
131    }
132}
133
134/// Normalized semantic metadata for one message argument.
135#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
136#[serde(deny_unknown_fields)]
137pub struct MessageArgumentMetadata {
138    /// Broad message-level data kind.
139    #[serde(default)]
140    pub kind: MessageArgumentKind,
141    /// Optional semantic role such as `count`, `currency`, or `url`.
142    #[serde(default, skip_serializing_if = "Option::is_none")]
143    pub role: Option<String>,
144    /// Allowed enum/select values when known from extraction.
145    #[serde(default, skip_serializing_if = "Vec::is_empty")]
146    pub values: Vec<String>,
147    /// Optional formatter metadata.
148    #[serde(default, skip_serializing_if = "Option::is_none")]
149    pub format: Option<MessageArgumentFormatMetadata>,
150}
151
152impl Default for MessageArgumentMetadata {
153    fn default() -> Self {
154        Self {
155            kind: MessageArgumentKind::Unknown,
156            role: None,
157            values: Vec::new(),
158            format: None,
159        }
160    }
161}
162
163/// Broad argument kind used by semantic message metadata.
164#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Serialize, Deserialize)]
165#[serde(rename_all = "lowercase")]
166pub enum MessageArgumentKind {
167    /// Text or string-like value.
168    String,
169    /// Number-like value.
170    Number,
171    /// Date value.
172    Date,
173    /// Time value.
174    Time,
175    /// Date-time value.
176    Datetime,
177    /// Boolean value.
178    Boolean,
179    /// Enumerated value.
180    Enum,
181    /// List value.
182    List,
183    /// Duration value.
184    Duration,
185    /// Relative-time value.
186    RelativeTime,
187    /// Person, region, or display name value.
188    Name,
189    /// Unknown or intentionally unspecified value.
190    #[default]
191    Unknown,
192}
193
194/// Formatter metadata attached to an argument.
195#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
196#[serde(deny_unknown_fields)]
197pub struct MessageArgumentFormatMetadata {
198    /// Raw formatter style, when present.
199    #[serde(default, skip_serializing_if = "Option::is_none")]
200    pub style: Option<String>,
201    /// Classification of the formatter style.
202    #[serde(default, skip_serializing_if = "Option::is_none")]
203    pub style_kind: Option<MessageFormatStyleKind>,
204}
205
206/// Classification of a message formatter style.
207#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
208#[serde(rename_all = "lowercase")]
209pub enum MessageFormatStyleKind {
210    /// Formatter has no style segment.
211    None,
212    /// Formatter uses a known named style.
213    Predefined,
214    /// Formatter uses an ICU skeleton.
215    Skeleton,
216    /// Formatter uses an opaque pattern-style segment.
217    Pattern,
218}
219
220/// Selector metadata attached to a selecting argument.
221#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
222#[serde(deny_unknown_fields)]
223pub struct MessageSelectorMetadata {
224    /// Selector expression kind.
225    pub kind: MessageSelectorKind,
226    /// Known selector cases in source order.
227    #[serde(default, skip_serializing_if = "Vec::is_empty")]
228    pub cases: Vec<String>,
229    /// Optional plural offset. Omitted when zero.
230    #[serde(default, skip_serializing_if = "Option::is_none")]
231    pub offset: Option<u32>,
232}
233
234/// Selector kind used by semantic message metadata.
235#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
236#[serde(rename_all = "lowercase")]
237pub enum MessageSelectorKind {
238    /// General select expression.
239    Select,
240    /// Cardinal plural expression.
241    Plural,
242    /// Ordinal plural expression.
243    #[serde(rename = "selectordinal")]
244    SelectOrdinal,
245}
246
247/// One diagnostic emitted while validating semantic message metadata.
248#[derive(Debug, Clone, PartialEq, Eq)]
249pub struct MessageMetadataDiagnostic {
250    /// Severity for the diagnostic.
251    pub severity: IcuDiagnosticSeverity,
252    /// Stable machine-readable diagnostic code.
253    pub code: String,
254    /// Human-readable explanation of the condition.
255    pub message: String,
256    /// Argument, selector, tag, or field name associated with the diagnostic.
257    pub name: Option<String>,
258}
259
260impl MessageMetadataDiagnostic {
261    fn new(
262        severity: IcuDiagnosticSeverity,
263        code: &'static str,
264        message: impl Into<String>,
265        name: impl Into<Option<String>>,
266    ) -> Self {
267        Self {
268            severity,
269            code: code.to_owned(),
270            message: message.into(),
271            name: name.into(),
272        }
273    }
274}
275
276/// Report returned by [`validate_message_metadata`].
277#[derive(Debug, Clone, PartialEq, Eq, Default)]
278pub struct MessageMetadataValidationReport {
279    /// Diagnostics found while validating the metadata.
280    pub diagnostics: Vec<MessageMetadataDiagnostic>,
281}
282
283impl MessageMetadataValidationReport {
284    /// Returns `true` when the report contains at least one error diagnostic.
285    #[must_use]
286    pub fn has_errors(&self) -> bool {
287        self.diagnostics
288            .iter()
289            .any(|diagnostic| diagnostic.severity == IcuDiagnosticSeverity::Error)
290    }
291}
292
293/// Derives normalized semantic metadata from an ICU MessageFormat v1 `msgid`.
294///
295/// Plain text messages are valid and produce metadata without arguments, tags,
296/// or selectors.
297///
298/// # Errors
299///
300/// Returns an ICU parse error when `msgid` is not valid ICU MessageFormat v1.
301pub fn derive_message_metadata_from_icu(
302    msgid: &str,
303    msgctxt: Option<&str>,
304) -> Result<MessageMetadata, IcuParseError> {
305    let message = parse_icu(msgid)?;
306    let analysis = analyze_icu(&message);
307    Ok(MessageMetadata {
308        msgid: msgid.to_owned(),
309        msgctxt: msgctxt.map(str::to_owned),
310        description: None,
311        origin: Vec::new(),
312        args: derive_args(&analysis),
313        tags: unique_strings(analysis.tags.iter().map(|tag| tag.name.as_str())),
314        selectors: derive_selectors(&analysis),
315    })
316}
317
318/// Normalizes progressive semantic message metadata into canonical object form.
319///
320/// Omitted semantic fields are derived from `msgid`. Explicit metadata is kept
321/// and enriched with any derived facts that do not conflict.
322///
323/// # Errors
324///
325/// Returns an ICU parse error when `input.msgid` is not valid ICU MessageFormat
326/// v1.
327pub fn normalize_message_metadata(
328    input: MessageMetadataInput,
329) -> Result<MessageMetadata, IcuParseError> {
330    let mut derived = derive_message_metadata_from_icu(&input.msgid, input.msgctxt.as_deref())?;
331    derived.description = input.description;
332    derived.origin = input.origin;
333
334    if let Some(args) = input.args {
335        let mut normalized_args = args
336            .into_iter()
337            .map(|(name, argument)| (name, argument.into_metadata()))
338            .collect::<BTreeMap<_, _>>();
339        for (name, argument) in derived.args {
340            normalized_args.entry(name).or_insert(argument);
341        }
342        derived.args = normalized_args;
343    }
344
345    if let Some(tags) = input.tags {
346        let mut normalized_tags = unique_strings(tags.iter().map(String::as_str));
347        for tag in derived.tags {
348            if !normalized_tags.contains(&tag) {
349                normalized_tags.push(tag);
350            }
351        }
352        derived.tags = normalized_tags;
353    }
354
355    if let Some(selectors) = input.selectors {
356        let mut normalized_selectors = selectors;
357        for (name, selector) in derived.selectors {
358            normalized_selectors.entry(name).or_insert(selector);
359        }
360        derived.selectors = normalized_selectors;
361    }
362
363    Ok(derived)
364}
365
366/// Validates progressive semantic message metadata against its `msgid`.
367#[must_use]
368pub fn validate_message_metadata(input: &MessageMetadataInput) -> MessageMetadataValidationReport {
369    let Ok(derived) = derive_message_metadata_from_icu(&input.msgid, input.msgctxt.as_deref())
370    else {
371        return MessageMetadataValidationReport {
372            diagnostics: vec![MessageMetadataDiagnostic::new(
373                IcuDiagnosticSeverity::Error,
374                "metadata.invalid_msgid",
375                "Message metadata `msgid` is not valid ICU MessageFormat v1.",
376                Some("msgid".to_owned()),
377            )],
378        };
379    };
380
381    let mut report = MessageMetadataValidationReport::default();
382    if let Some(args) = &input.args {
383        validate_args(args, &derived.args, &mut report);
384    }
385    if let Some(tags) = &input.tags {
386        validate_tags(tags, &derived.tags, &mut report);
387    }
388    if let Some(selectors) = &input.selectors {
389        validate_selectors(selectors, &derived.selectors, &mut report);
390    }
391    report
392}
393
394fn derive_args(analysis: &IcuAnalysis) -> BTreeMap<String, MessageArgumentMetadata> {
395    let mut args = BTreeMap::<String, MessageArgumentMetadata>::new();
396    for argument in &analysis.arguments {
397        args.entry(argument.name.clone())
398            .and_modify(|metadata| merge_icu_argument(metadata, argument.kind))
399            .or_insert_with(|| metadata_for_icu_argument(argument.kind));
400    }
401    for formatter in &analysis.formatters {
402        args.entry(formatter.name.clone())
403            .and_modify(|metadata| {
404                merge_icu_argument(metadata, formatter.kind);
405                metadata.format = Some(MessageArgumentFormatMetadata {
406                    style: formatter.style.clone(),
407                    style_kind: Some(style_kind(formatter.style_kind)),
408                });
409            })
410            .or_insert_with(|| {
411                let mut metadata = metadata_for_icu_argument(formatter.kind);
412                metadata.format = Some(MessageArgumentFormatMetadata {
413                    style: formatter.style.clone(),
414                    style_kind: Some(style_kind(formatter.style_kind)),
415                });
416                metadata
417            });
418    }
419    for select in &analysis.selects {
420        let metadata = args.entry(select.name.clone()).or_default();
421        metadata.kind = MessageArgumentKind::Enum;
422        metadata.values = select.selectors.clone();
423    }
424    for plural in &analysis.plurals {
425        let metadata = args.entry(plural.name.clone()).or_default();
426        match plural.kind {
427            IcuPluralKind::Cardinal => {
428                metadata.kind = MessageArgumentKind::Number;
429                metadata.role.get_or_insert_with(|| "count".to_owned());
430            }
431            IcuPluralKind::Ordinal => {
432                metadata.kind = MessageArgumentKind::Number;
433                metadata.role.get_or_insert_with(|| "ordinal".to_owned());
434            }
435        }
436    }
437    args
438}
439
440fn merge_icu_argument(metadata: &mut MessageArgumentMetadata, kind: IcuArgumentKind) {
441    let next = argument_kind(kind);
442    if metadata.kind == MessageArgumentKind::Unknown || next != MessageArgumentKind::Unknown {
443        metadata.kind = next;
444    }
445}
446
447fn metadata_for_icu_argument(kind: IcuArgumentKind) -> MessageArgumentMetadata {
448    MessageArgumentMetadata {
449        kind: argument_kind(kind),
450        role: role_for_icu_argument(kind),
451        values: Vec::new(),
452        format: None,
453    }
454}
455
456fn argument_kind(kind: IcuArgumentKind) -> MessageArgumentKind {
457    match kind {
458        IcuArgumentKind::Argument => MessageArgumentKind::Unknown,
459        IcuArgumentKind::Number | IcuArgumentKind::Plural | IcuArgumentKind::SelectOrdinal => {
460            MessageArgumentKind::Number
461        }
462        IcuArgumentKind::Date => MessageArgumentKind::Date,
463        IcuArgumentKind::Time => MessageArgumentKind::Time,
464        IcuArgumentKind::List => MessageArgumentKind::List,
465        IcuArgumentKind::Duration => MessageArgumentKind::Duration,
466        IcuArgumentKind::Ago => MessageArgumentKind::RelativeTime,
467        IcuArgumentKind::Name => MessageArgumentKind::Name,
468        IcuArgumentKind::Select => MessageArgumentKind::Enum,
469    }
470}
471
472fn role_for_icu_argument(kind: IcuArgumentKind) -> Option<String> {
473    match kind {
474        IcuArgumentKind::Plural => Some("count".to_owned()),
475        IcuArgumentKind::SelectOrdinal => Some("ordinal".to_owned()),
476        _ => None,
477    }
478}
479
480fn style_kind(kind: IcuStyleKind) -> MessageFormatStyleKind {
481    match kind {
482        IcuStyleKind::None => MessageFormatStyleKind::None,
483        IcuStyleKind::Predefined => MessageFormatStyleKind::Predefined,
484        IcuStyleKind::Skeleton => MessageFormatStyleKind::Skeleton,
485        IcuStyleKind::Pattern => MessageFormatStyleKind::Pattern,
486    }
487}
488
489fn derive_selectors(analysis: &IcuAnalysis) -> BTreeMap<String, MessageSelectorMetadata> {
490    let mut selectors = BTreeMap::new();
491    for select in &analysis.selects {
492        selectors.insert(
493            select.name.clone(),
494            MessageSelectorMetadata {
495                kind: MessageSelectorKind::Select,
496                cases: select.selectors.clone(),
497                offset: None,
498            },
499        );
500    }
501    for plural in &analysis.plurals {
502        selectors.insert(
503            plural.name.clone(),
504            MessageSelectorMetadata {
505                kind: match plural.kind {
506                    IcuPluralKind::Cardinal => MessageSelectorKind::Plural,
507                    IcuPluralKind::Ordinal => MessageSelectorKind::SelectOrdinal,
508                },
509                cases: plural.selectors.clone(),
510                offset: (plural.offset != 0).then_some(plural.offset),
511            },
512        );
513    }
514    selectors
515}
516
517fn validate_args(
518    input: &BTreeMap<String, MessageArgumentMetadataInput>,
519    derived: &BTreeMap<String, MessageArgumentMetadata>,
520    report: &mut MessageMetadataValidationReport,
521) {
522    let normalized = input
523        .iter()
524        .map(|(name, argument)| (name, argument.clone().into_metadata()))
525        .collect::<BTreeMap<_, _>>();
526
527    for name in derived.keys() {
528        if !normalized.contains_key(name) {
529            report.diagnostics.push(MessageMetadataDiagnostic::new(
530                IcuDiagnosticSeverity::Warning,
531                "metadata.missing_argument",
532                format!("Message metadata is missing parsed ICU argument `{name}`."),
533                Some(name.clone()),
534            ));
535        }
536    }
537    for (name, argument) in &normalized {
538        let Some(derived_argument) = derived.get(*name) else {
539            report.diagnostics.push(MessageMetadataDiagnostic::new(
540                IcuDiagnosticSeverity::Error,
541                "metadata.extra_argument",
542                format!("Message metadata declares argument `{name}` that is not used by `msgid`."),
543                Some((*name).clone()),
544            ));
545            continue;
546        };
547        if argument.kind != MessageArgumentKind::Unknown
548            && derived_argument.kind != MessageArgumentKind::Unknown
549            && argument.kind != derived_argument.kind
550        {
551            report.diagnostics.push(MessageMetadataDiagnostic::new(
552                IcuDiagnosticSeverity::Error,
553                "metadata.argument_kind_mismatch",
554                format!(
555                    "Message metadata declares argument `{name}` as {:?}, but `msgid` uses {:?}.",
556                    argument.kind, derived_argument.kind
557                ),
558                Some((*name).clone()),
559            ));
560        }
561    }
562}
563
564fn validate_tags(
565    input: &[String],
566    derived: &[String],
567    report: &mut MessageMetadataValidationReport,
568) {
569    let input = input.iter().cloned().collect::<BTreeSet<_>>();
570    let derived = derived.iter().cloned().collect::<BTreeSet<_>>();
571    for tag in &derived {
572        if !input.contains(tag) {
573            report.diagnostics.push(MessageMetadataDiagnostic::new(
574                IcuDiagnosticSeverity::Warning,
575                "metadata.missing_tag",
576                format!("Message metadata is missing parsed ICU tag `{tag}`."),
577                Some(tag.clone()),
578            ));
579        }
580    }
581    for tag in &input {
582        if !derived.contains(tag) {
583            report.diagnostics.push(MessageMetadataDiagnostic::new(
584                IcuDiagnosticSeverity::Error,
585                "metadata.extra_tag",
586                format!("Message metadata declares tag `{tag}` that is not used by `msgid`."),
587                Some(tag.clone()),
588            ));
589        }
590    }
591}
592
593fn validate_selectors(
594    input: &BTreeMap<String, MessageSelectorMetadata>,
595    derived: &BTreeMap<String, MessageSelectorMetadata>,
596    report: &mut MessageMetadataValidationReport,
597) {
598    for name in derived.keys() {
599        if !input.contains_key(name) {
600            report.diagnostics.push(MessageMetadataDiagnostic::new(
601                IcuDiagnosticSeverity::Warning,
602                "metadata.missing_selector",
603                format!("Message metadata is missing parsed ICU selector `{name}`."),
604                Some(name.clone()),
605            ));
606        }
607    }
608    for (name, selector) in input {
609        let Some(derived_selector) = derived.get(name) else {
610            report.diagnostics.push(MessageMetadataDiagnostic::new(
611                IcuDiagnosticSeverity::Error,
612                "metadata.extra_selector",
613                format!("Message metadata declares selector `{name}` that is not used by `msgid`."),
614                Some(name.clone()),
615            ));
616            continue;
617        };
618        if selector.kind != derived_selector.kind {
619            report.diagnostics.push(MessageMetadataDiagnostic::new(
620                IcuDiagnosticSeverity::Error,
621                "metadata.selector_kind_mismatch",
622                format!(
623                    "Message metadata declares selector `{name}` as {:?}, but `msgid` uses {:?}.",
624                    selector.kind, derived_selector.kind
625                ),
626                Some(name.clone()),
627            ));
628        }
629        let derived_cases = derived_selector
630            .cases
631            .iter()
632            .cloned()
633            .collect::<BTreeSet<_>>();
634        let input_cases = selector.cases.iter().cloned().collect::<BTreeSet<_>>();
635        for case in &derived_selector.cases {
636            if !input_cases.contains(case) {
637                report.diagnostics.push(MessageMetadataDiagnostic::new(
638                    IcuDiagnosticSeverity::Warning,
639                    "metadata.missing_selector_case",
640                    format!(
641                        "Message metadata is missing parsed ICU selector case `{case}` for `{name}`."
642                    ),
643                    Some(format!("{name}:{case}")),
644                ));
645            }
646        }
647        for case in &selector.cases {
648            if !derived_cases.contains(case) {
649                report.diagnostics.push(MessageMetadataDiagnostic::new(
650                    IcuDiagnosticSeverity::Error,
651                    "metadata.extra_selector_case",
652                    format!(
653                        "Message metadata declares selector case `{case}` for `{name}` that is not used by `msgid`."
654                    ),
655                    Some(format!("{name}:{case}")),
656                ));
657            }
658        }
659        if selector.offset != derived_selector.offset {
660            report.diagnostics.push(MessageMetadataDiagnostic::new(
661                IcuDiagnosticSeverity::Error,
662                "metadata.selector_offset_mismatch",
663                format!(
664                    "Message metadata declares selector `{name}` offset {:?}, but `msgid` uses {:?}.",
665                    selector.offset, derived_selector.offset
666                ),
667                Some(name.clone()),
668            ));
669        }
670    }
671}
672
673fn unique_strings<'a>(values: impl IntoIterator<Item = &'a str>) -> Vec<String> {
674    let mut seen = BTreeSet::new();
675    let mut out = Vec::new();
676    for value in values {
677        if seen.insert(value.to_owned()) {
678            out.push(value.to_owned());
679        }
680    }
681    out
682}
683
684#[cfg(test)]
685mod tests {
686    use std::collections::BTreeMap;
687
688    use crate::{
689        IcuDiagnosticSeverity, MessageArgumentFormatMetadata, MessageArgumentKind,
690        MessageArgumentMetadata, MessageArgumentMetadataInput, MessageFormatStyleKind,
691        MessageMetadataInput, MessageSelectorKind, MessageSelectorMetadata,
692        derive_message_metadata_from_icu, normalize_message_metadata, validate_message_metadata,
693    };
694
695    #[test]
696    fn minimal_metadata_normalizes_without_semantic_fields() {
697        let input = MessageMetadataInput::new("Cart");
698
699        let metadata = normalize_message_metadata(input).expect("normalize metadata");
700
701        assert_eq!(metadata.msgid, "Cart");
702        assert!(metadata.args.is_empty());
703        assert!(metadata.tags.is_empty());
704        assert!(metadata.selectors.is_empty());
705    }
706
707    #[test]
708    fn placeholder_msgid_derives_argument_when_omitted() {
709        let metadata =
710            derive_message_metadata_from_icu("Hello {name}", None).expect("derive metadata");
711
712        assert_eq!(
713            metadata.args.get("name").map(|argument| argument.kind),
714            Some(MessageArgumentKind::Unknown)
715        );
716    }
717
718    #[test]
719    fn shorthand_argument_input_normalizes_to_object_metadata() {
720        let mut args = BTreeMap::new();
721        args.insert(
722            "name".to_owned(),
723            MessageArgumentMetadataInput::Kind(MessageArgumentKind::String),
724        );
725        let mut input = MessageMetadataInput::new("Hello {name}");
726        input.args = Some(args);
727
728        let metadata = normalize_message_metadata(input).expect("normalize metadata");
729
730        assert_eq!(
731            metadata.args.get("name").map(|argument| argument.kind),
732            Some(MessageArgumentKind::String)
733        );
734    }
735
736    #[test]
737    fn msgctxt_and_msgid_remain_exact_source_identity() {
738        let mut input = MessageMetadataInput::new("Home");
739        input.msgctxt = Some("navigation".to_owned());
740
741        let metadata = normalize_message_metadata(input).expect("normalize metadata");
742
743        assert_eq!(metadata.msgid, "Home");
744        assert_eq!(metadata.msgctxt.as_deref(), Some("navigation"));
745    }
746
747    #[test]
748    fn plural_msgid_derives_count_argument_and_selector_cases() {
749        let metadata = derive_message_metadata_from_icu(
750            "{count, plural, one {One item} other {# items}}",
751            None,
752        )
753        .expect("derive metadata");
754
755        let count = metadata.args.get("count").expect("count metadata");
756        assert_eq!(count.kind, MessageArgumentKind::Number);
757        assert_eq!(count.role.as_deref(), Some("count"));
758        let selector = metadata.selectors.get("count").expect("count selector");
759        assert_eq!(selector.kind, MessageSelectorKind::Plural);
760        assert_eq!(selector.cases, vec!["one", "other"]);
761        assert_eq!(selector.offset, None);
762    }
763
764    #[test]
765    fn select_msgid_derives_enum_argument_and_selector_cases() {
766        let metadata = derive_message_metadata_from_icu(
767            "{status, select, shipped {Shipped} cancelled {Cancelled} other {Updated}}",
768            None,
769        )
770        .expect("derive metadata");
771
772        let status = metadata.args.get("status").expect("status argument");
773        assert_eq!(status.kind, MessageArgumentKind::Enum);
774        assert_eq!(status.values, vec!["shipped", "cancelled", "other"]);
775        assert_eq!(
776            metadata
777                .selectors
778                .get("status")
779                .map(|selector| selector.kind),
780            Some(MessageSelectorKind::Select)
781        );
782    }
783
784    #[test]
785    fn rich_text_tags_derive_into_metadata() {
786        let metadata =
787            derive_message_metadata_from_icu("Read <link>terms</link>", None).expect("derive");
788
789        assert_eq!(metadata.tags, vec!["link"]);
790    }
791
792    #[test]
793    fn conflicting_explicit_metadata_emits_diagnostics() {
794        let mut args = BTreeMap::new();
795        args.insert(
796            "count".to_owned(),
797            MessageArgumentMetadataInput::Kind(MessageArgumentKind::String),
798        );
799        args.insert(
800            "unused".to_owned(),
801            MessageArgumentMetadataInput::Kind(MessageArgumentKind::String),
802        );
803        let mut input =
804            MessageMetadataInput::new("{count, plural, one {One item} other {# items}}");
805        input.args = Some(args);
806
807        let report = validate_message_metadata(&input);
808        let codes = report
809            .diagnostics
810            .iter()
811            .map(|diagnostic| diagnostic.code.as_str())
812            .collect::<Vec<_>>();
813
814        assert!(report.has_errors());
815        assert!(codes.contains(&"metadata.argument_kind_mismatch"));
816        assert!(codes.contains(&"metadata.extra_argument"));
817        assert!(
818            report
819                .diagnostics
820                .iter()
821                .any(|diagnostic| diagnostic.severity == IcuDiagnosticSeverity::Error)
822        );
823    }
824
825    #[test]
826    fn selector_metadata_reports_missing_source_cases() {
827        let mut selectors = BTreeMap::new();
828        selectors.insert(
829            "count".to_owned(),
830            MessageSelectorMetadata {
831                kind: MessageSelectorKind::Plural,
832                cases: vec!["one".to_owned()],
833                offset: None,
834            },
835        );
836        let mut input =
837            MessageMetadataInput::new("{count, plural, one {One item} other {# items}}");
838        input.selectors = Some(selectors);
839
840        let report = validate_message_metadata(&input);
841
842        assert!(report.diagnostics.iter().any(|diagnostic| {
843            diagnostic.code == "metadata.missing_selector_case"
844                && diagnostic.name.as_deref() == Some("count:other")
845        }));
846    }
847
848    #[test]
849    fn id_style_msgid_is_accepted_without_special_behavior() {
850        let input = MessageMetadataInput::new("cart.item_count");
851
852        let report = validate_message_metadata(&input);
853
854        assert!(report.diagnostics.is_empty());
855    }
856
857    #[test]
858    fn json_shorthand_argument_deserializes() {
859        let input: MessageMetadataInput = serde_json::from_str(
860            r#"{
861                "msgid": "Hello {name}",
862                "args": {
863                    "name": "string"
864                }
865            }"#,
866        )
867        .expect("deserialize metadata input");
868
869        let metadata = normalize_message_metadata(input).expect("normalize metadata");
870
871        assert_eq!(
872            metadata.args.get("name").map(|argument| argument.kind),
873            Some(MessageArgumentKind::String)
874        );
875    }
876
877    #[test]
878    fn explicit_details_origin_tags_and_selectors_are_preserved_and_enriched() {
879        let mut args = BTreeMap::new();
880        args.insert(
881            "name".to_owned(),
882            MessageArgumentMetadata {
883                kind: MessageArgumentKind::String,
884                ..MessageArgumentMetadata::default()
885            }
886            .into(),
887        );
888        let mut selectors = BTreeMap::new();
889        selectors.insert(
890            "status".to_owned(),
891            MessageSelectorMetadata {
892                kind: MessageSelectorKind::Select,
893                cases: vec!["open".to_owned(), "other".to_owned()],
894                offset: None,
895            },
896        );
897        let mut input = MessageMetadataInput::new(
898            "<link>{status, select, open {Hello {name}} other {Done}}</link> <strong>!</strong>",
899        );
900        input.description = Some("Shown in the activity feed.".to_owned());
901        input.origin.push(crate::MessageOriginMetadata {
902            file: Some("src/app.rs".to_owned()),
903            line: Some(12),
904            component: Some("ActivityFeed".to_owned()),
905            route: Some("/activity".to_owned()),
906        });
907        input.args = Some(args);
908        input.tags = Some(vec!["link".to_owned(), "link".to_owned()]);
909        input.selectors = Some(selectors);
910
911        let metadata = normalize_message_metadata(input).expect("normalize metadata");
912
913        assert_eq!(
914            metadata.args.get("name").map(|argument| argument.kind),
915            Some(MessageArgumentKind::String)
916        );
917        assert_eq!(
918            metadata.args.get("status").map(|argument| argument.kind),
919            Some(MessageArgumentKind::Enum)
920        );
921        assert_eq!(metadata.tags, vec!["link", "strong"]);
922        assert_eq!(
923            metadata.description.as_deref(),
924            Some("Shown in the activity feed.")
925        );
926        assert_eq!(metadata.origin[0].file.as_deref(), Some("src/app.rs"));
927        assert_eq!(
928            metadata
929                .selectors
930                .get("status")
931                .map(|selector| selector.cases.as_slice()),
932            Some(&["open".to_owned(), "other".to_owned()][..])
933        );
934    }
935
936    #[test]
937    fn formatter_metadata_derives_kinds_styles_roles_and_selector_offsets() {
938        let metadata = derive_message_metadata_from_icu(
939            "{price, number, ::currency/USD} {created, date, short} {time, time, HH:mm} \
940             {items, list, conjunction} {elapsed, duration} {since, ago} {person, name} \
941             {rank, selectordinal, offset:1 one {#st} other {#th}}",
942            None,
943        )
944        .expect("derive metadata");
945
946        assert_eq!(
947            metadata
948                .args
949                .get("price")
950                .and_then(|argument| argument.format.as_ref()),
951            Some(&MessageArgumentFormatMetadata {
952                style: Some("::currency/USD".to_owned()),
953                style_kind: Some(MessageFormatStyleKind::Skeleton),
954            })
955        );
956        assert_eq!(
957            metadata
958                .args
959                .get("created")
960                .and_then(|argument| argument.format.as_ref()),
961            Some(&MessageArgumentFormatMetadata {
962                style: Some("short".to_owned()),
963                style_kind: Some(MessageFormatStyleKind::Predefined),
964            })
965        );
966        assert_eq!(
967            metadata
968                .args
969                .get("time")
970                .and_then(|argument| argument.format.as_ref()),
971            Some(&MessageArgumentFormatMetadata {
972                style: Some("HH:mm".to_owned()),
973                style_kind: Some(MessageFormatStyleKind::Pattern),
974            })
975        );
976        assert_eq!(
977            metadata.args.get("items").map(|argument| argument.kind),
978            Some(MessageArgumentKind::List)
979        );
980        assert_eq!(
981            metadata.args.get("elapsed").map(|argument| argument.kind),
982            Some(MessageArgumentKind::Duration)
983        );
984        assert_eq!(
985            metadata.args.get("since").map(|argument| argument.kind),
986            Some(MessageArgumentKind::RelativeTime)
987        );
988        assert_eq!(
989            metadata.args.get("person").map(|argument| argument.kind),
990            Some(MessageArgumentKind::Name)
991        );
992        let rank = metadata.args.get("rank").expect("rank argument");
993        assert_eq!(rank.kind, MessageArgumentKind::Number);
994        assert_eq!(rank.role.as_deref(), Some("ordinal"));
995        let selector = metadata.selectors.get("rank").expect("rank selector");
996        assert_eq!(selector.kind, MessageSelectorKind::SelectOrdinal);
997        assert_eq!(selector.offset, Some(1));
998    }
999
1000    #[test]
1001    fn invalid_msgid_reports_metadata_diagnostic() {
1002        let input = MessageMetadataInput::new("{count, plural, one {One item}}");
1003
1004        let report = validate_message_metadata(&input);
1005
1006        assert_eq!(
1007            report
1008                .diagnostics
1009                .iter()
1010                .map(|diagnostic| diagnostic.code.as_str())
1011                .collect::<Vec<_>>(),
1012            vec!["metadata.invalid_msgid"]
1013        );
1014        assert!(report.has_errors());
1015    }
1016
1017    #[test]
1018    fn validation_reports_missing_and_extra_tags_and_selectors() {
1019        let mut selectors = BTreeMap::new();
1020        selectors.insert(
1021            "status".to_owned(),
1022            MessageSelectorMetadata {
1023                kind: MessageSelectorKind::Plural,
1024                cases: vec!["open".to_owned(), "closed".to_owned(), "other".to_owned()],
1025                offset: Some(1),
1026            },
1027        );
1028        selectors.insert(
1029            "unused".to_owned(),
1030            MessageSelectorMetadata {
1031                kind: MessageSelectorKind::Select,
1032                cases: vec!["other".to_owned()],
1033                offset: None,
1034            },
1035        );
1036        let mut input =
1037            MessageMetadataInput::new("<link>{status, select, open {Open} other {Other}}</link>");
1038        input.tags = Some(vec!["button".to_owned()]);
1039        input.selectors = Some(selectors);
1040
1041        let report = validate_message_metadata(&input);
1042        let codes = report
1043            .diagnostics
1044            .iter()
1045            .map(|diagnostic| diagnostic.code.as_str())
1046            .collect::<Vec<_>>();
1047
1048        assert!(report.has_errors());
1049        assert!(codes.contains(&"metadata.missing_tag"));
1050        assert!(codes.contains(&"metadata.extra_tag"));
1051        assert!(codes.contains(&"metadata.selector_kind_mismatch"));
1052        assert!(codes.contains(&"metadata.extra_selector"));
1053        assert!(codes.contains(&"metadata.extra_selector_case"));
1054        assert!(codes.contains(&"metadata.selector_offset_mismatch"));
1055    }
1056
1057    #[test]
1058    fn validation_reports_missing_argument_tag_and_selector_metadata() {
1059        let mut input = MessageMetadataInput::new(
1060            "<link>{count, plural, one {{name} has one item} other {{name} has # items}}</link>",
1061        );
1062        input.args = Some(BTreeMap::new());
1063        input.tags = Some(Vec::new());
1064        input.selectors = Some(BTreeMap::new());
1065
1066        let report = validate_message_metadata(&input);
1067        let codes = report
1068            .diagnostics
1069            .iter()
1070            .map(|diagnostic| diagnostic.code.as_str())
1071            .collect::<Vec<_>>();
1072
1073        assert!(!report.has_errors());
1074        assert!(codes.contains(&"metadata.missing_argument"));
1075        assert!(codes.contains(&"metadata.missing_tag"));
1076        assert!(codes.contains(&"metadata.missing_selector"));
1077    }
1078}