Skip to main content

citum_schema_style/locale/
mod.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus and Citum contributors
4*/
5
6//! Locale definitions for Citum.
7//!
8//! Locales provide language-specific terms, date formats, and punctuation rules
9//! for citation formatting.
10
11mod embedded;
12/// Locator text normalization.
13pub mod locator;
14/// Message evaluation for parameterized locale strings.
15pub mod message;
16mod message_ids;
17/// Raw locale types used during locale file parsing.
18pub mod raw;
19mod raw_conversion;
20/// Structured locale types used by the processor.
21pub mod types;
22
23use crate::citation::LocatorType;
24use crate::template::ContributorRole;
25pub use message::{MessageArgs, MessageEvaluator, Mf2MessageEvaluator};
26pub use raw::{RawLocale, RawTermValue};
27#[cfg(feature = "schema")]
28use schemars::JsonSchema;
29use serde::{Deserialize, Serialize};
30use std::collections::HashMap;
31use std::fmt;
32use std::sync::Arc;
33pub use types::*;
34
35/// A list of month names (12 elements for Jan-Dec).
36pub type MonthList = Vec<String>;
37
38/// Identifies a field in the archive hierarchy for locale term lookup.
39#[derive(Clone, Copy, Debug, PartialEq, Eq)]
40pub enum ArchiveHierarchyField {
41    /// Named collection or record group.
42    Collection,
43    /// Named series or sub-collection.
44    Series,
45    /// Box or container designation.
46    Box,
47    /// Folder designation.
48    Folder,
49    /// Item, file, or reference-code designation.
50    Item,
51}
52
53impl ArchiveHierarchyField {
54    /// Returns the MF2 message ID for this field's locale label.
55    fn message_id(self) -> &'static str {
56        match self {
57            Self::Collection => "term.archive-collection-label",
58            Self::Series => "term.archive-series-label",
59            Self::Box => "term.archive-box-label",
60            Self::Folder => "term.archive-folder-label",
61            Self::Item => "term.archive-item-label",
62        }
63    }
64}
65
66/// A locale definition containing language-specific terms and formatting rules.
67///
68/// The `evaluator` field holds the message evaluation engine, selected based on
69/// `evaluation.message_syntax`. This allows for trait-based swapping to ICU4X
70/// implementations in the future without changing call sites.
71#[derive(Clone, Deserialize, Serialize)]
72#[cfg_attr(feature = "schema", derive(JsonSchema))]
73#[serde(rename_all = "kebab-case")]
74pub struct Locale {
75    /// The locale identifier (e.g., "en-US", "de-DE").
76    #[cfg_attr(feature = "schema", schemars(skip))]
77    pub locale: String,
78    /// Date-related terms (months, seasons).
79    #[serde(default)]
80    pub dates: DateTerms,
81    /// Contributor role terms (editor, translator, etc.).
82    #[serde(default)]
83    #[cfg_attr(feature = "schema", schemars(skip))]
84    pub roles: HashMap<ContributorRole, ContributorTerm>,
85    /// Locator terms (page, chapter, etc.).
86    #[serde(default)]
87    #[cfg_attr(feature = "schema", schemars(skip))]
88    pub locators: HashMap<LocatorType, LocatorTerm>,
89    /// General terms (and, et al., etc.).
90    #[serde(default)]
91    pub terms: Terms,
92    /// Whether to place periods/commas inside quotation marks.
93    /// true = American style ("text."), false = British style ("text".)
94    #[serde(default)]
95    pub punctuation_in_quote: bool,
96    /// Articles to strip from titles when sorting (e.g., "the", "a", "an" for English).
97    /// These should be lowercase and will be matched case-insensitively.
98    #[serde(default, skip_serializing_if = "Vec::is_empty")]
99    pub sort_articles: Vec<String>,
100    /// Schema version from the source locale file (None = legacy v1).
101    #[serde(default, skip_serializing_if = "Option::is_none")]
102    pub locale_schema_version: Option<String>,
103    /// Runtime evaluation configuration.
104    #[serde(default)]
105    pub evaluation: EvaluationConfig,
106    /// ICU MF1 messages keyed by message ID (populated for v2 locales).
107    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
108    pub messages: HashMap<String, String>,
109    /// Named date format presets: symbolic name → CLDR pattern.
110    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
111    pub date_formats: HashMap<String, String>,
112    /// Number formatting options.
113    #[serde(default)]
114    pub number_formats: NumberFormats,
115    /// Grammar options.
116    #[serde(default)]
117    pub grammar_options: GrammarOptions,
118    /// Backwards-compatibility aliases: old term key → new message ID.
119    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
120    pub legacy_term_aliases: HashMap<String, String>,
121    /// Vocabulary maps for genre and medium display text.
122    #[serde(default, skip_serializing_if = "VocabMap::is_empty")]
123    pub vocab: VocabMap,
124    /// Message evaluator implementation (not serialized; set during load).
125    #[serde(skip, default = "default_evaluator")]
126    #[cfg_attr(feature = "schema", schemars(skip))]
127    pub evaluator: Arc<dyn MessageEvaluator>,
128}
129
130/// Default message evaluator (MF2).
131fn default_evaluator() -> Arc<dyn MessageEvaluator> {
132    Arc::new(Mf2MessageEvaluator)
133}
134
135impl Default for Locale {
136    fn default() -> Self {
137        Self {
138            locale: String::default(),
139            dates: DateTerms::default(),
140            roles: HashMap::default(),
141            locators: HashMap::default(),
142            terms: Terms::default(),
143            punctuation_in_quote: false,
144            sort_articles: Vec::default(),
145            locale_schema_version: None,
146            evaluation: EvaluationConfig::default(),
147            messages: HashMap::default(),
148            date_formats: HashMap::default(),
149            number_formats: NumberFormats::default(),
150            grammar_options: GrammarOptions::default(),
151            legacy_term_aliases: HashMap::default(),
152            vocab: VocabMap::default(),
153            evaluator: default_evaluator(),
154        }
155    }
156}
157
158impl fmt::Debug for Locale {
159    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
160        f.debug_struct("Locale")
161            .field("locale", &self.locale)
162            .field("dates", &self.dates)
163            .field("roles", &self.roles)
164            .field("locators", &self.locators)
165            .field("terms", &self.terms)
166            .field("punctuation_in_quote", &self.punctuation_in_quote)
167            .field("sort_articles", &self.sort_articles)
168            .field("locale_schema_version", &self.locale_schema_version)
169            .field("evaluation", &self.evaluation)
170            .field("messages", &self.messages)
171            .field("date_formats", &self.date_formats)
172            .field("number_formats", &self.number_formats)
173            .field("grammar_options", &self.grammar_options)
174            .field("legacy_term_aliases", &self.legacy_term_aliases)
175            .field("vocab", &self.vocab)
176            .field("evaluator", &"<MessageEvaluator>")
177            .finish()
178    }
179}
180
181/// Convert a kebab-case key to a human-readable display string.
182///
183/// Splits on `-`, capitalizes the first character of the first word, and joins with spaces.
184fn kebab_to_display(key: &str) -> String {
185    let mut words = key.split('-');
186    let mut result = String::new();
187    if let Some(first) = words.next() {
188        let mut chars = first.chars();
189        if let Some(c) = chars.next() {
190            result.extend(c.to_uppercase());
191            result.push_str(chars.as_str());
192        }
193        for word in words {
194            result.push(' ');
195            result.push_str(word);
196        }
197    }
198    result
199}
200
201impl Locale {
202    /// Create a new English (US) locale with default terms.
203    pub fn en_us() -> Self {
204        Self {
205            locale: "en-US".into(),
206            dates: DateTerms::en_us(),
207            roles: embedded::en_us_role_terms(),
208            locators: embedded::en_us_locator_terms(),
209            terms: Terms::en_us(),
210            punctuation_in_quote: true,
211            sort_articles: vec!["the".into(), "a".into(), "an".into()],
212            locale_schema_version: None,
213            evaluation: EvaluationConfig {
214                message_syntax: MessageSyntax::Mf2,
215            },
216            messages: embedded::en_us_archive_messages(),
217            date_formats: HashMap::new(),
218            number_formats: NumberFormats {
219                decimal_separator: ".".into(),
220                thousands_separator: ",".into(),
221                minimum_digits: 1,
222            },
223            grammar_options: GrammarOptions {
224                punctuation_in_quote: true,
225                nbsp_before_colon: false,
226                open_quote: "\u{201C}".into(),
227                close_quote: "\u{201D}".into(),
228                open_inner_quote: "\u{2018}".into(),
229                close_inner_quote: "\u{2019}".into(),
230                serial_comma: true,
231                page_range_delimiter: "\u{2013}".into(),
232            },
233            legacy_term_aliases: HashMap::new(),
234            vocab: embedded::embedded_en_us_vocab().clone(),
235            evaluator: Arc::new(Mf2MessageEvaluator),
236        }
237    }
238
239    /// Strip leading articles from a string for sorting.
240    ///
241    /// Uses locale-specific articles (e.g., "the", "a", "an" for English;
242    /// "der", "die", "das" for German). Falls back to English articles
243    /// if no locale-specific articles are defined.
244    pub fn strip_sort_articles<'a>(&self, s: &'a str) -> &'a str {
245        let s = s.trim();
246
247        // Default English articles
248        const DEFAULT_ARTICLES: &[&str] = &["the", "a", "an"];
249
250        if self.sort_articles.is_empty() {
251            // Use default English articles
252            for article in DEFAULT_ARTICLES {
253                let prefix = format!("{} ", article);
254                if s.to_lowercase().starts_with(&prefix) {
255                    #[allow(
256                        clippy::string_slice,
257                        reason = "prefix is derived from ASCII article"
258                    )]
259                    return &s[prefix.len()..];
260                }
261            }
262        } else {
263            // Use locale-specific articles
264            for article in &self.sort_articles {
265                let prefix = format!("{} ", article);
266                if s.to_lowercase().starts_with(&prefix) {
267                    #[allow(
268                        clippy::string_slice,
269                        reason = "prefix is derived from a defined article"
270                    )]
271                    return &s[prefix.len()..];
272                }
273            }
274        }
275        s
276    }
277
278    /// Look up display text for a genre canonical key.
279    ///
280    /// Falls back to a readable form of the key if no translation found.
281    pub fn lookup_genre(&self, key: &str) -> String {
282        self.vocab
283            .genre
284            .get(key)
285            .cloned()
286            .unwrap_or_else(|| kebab_to_display(key))
287    }
288
289    /// Look up display text for a medium canonical key.
290    ///
291    /// Falls back to a readable form of the key if no translation found.
292    pub fn lookup_medium(&self, key: &str) -> String {
293        self.vocab
294            .medium
295            .get(key)
296            .cloned()
297            .unwrap_or_else(|| kebab_to_display(key))
298    }
299
300    /// Get a contributor role term.
301    fn resolve_gendered_value(
302        value: &MaybeGendered<String>,
303        requested_gender: Option<GrammaticalGender>,
304    ) -> Option<&str> {
305        value
306            .resolve_with_fallback(requested_gender)
307            .map(String::as_str)
308    }
309
310    fn resolve_gendered_value_neutral(value: &MaybeGendered<String>) -> Option<&str> {
311        value.resolve_neutral().map(String::as_str)
312    }
313
314    fn resolve_no_date_value<'a>(
315        value: &'a SimpleTerm,
316        form: &TermForm,
317        requested_gender: Option<GrammaticalGender>,
318    ) -> Option<&'a str> {
319        match requested_gender {
320            Some(GrammaticalGender::Common) => match *form {
321                TermForm::Long => value
322                    .long
323                    .resolve_strict(Some(GrammaticalGender::Common))
324                    .map(String::as_str),
325                TermForm::Short => value
326                    .short
327                    .resolve_strict(Some(GrammaticalGender::Common))
328                    .map(String::as_str)
329                    .filter(|value| !value.is_empty())
330                    .or_else(|| {
331                        value
332                            .long
333                            .resolve_strict(Some(GrammaticalGender::Common))
334                            .map(String::as_str)
335                    }),
336                _ => value
337                    .long
338                    .resolve_strict(Some(GrammaticalGender::Common))
339                    .map(String::as_str),
340            },
341            _ => match *form {
342                TermForm::Long => Self::resolve_gendered_value(&value.long, requested_gender),
343                TermForm::Short => {
344                    Self::resolve_gendered_value(&value.short, requested_gender.clone())
345                        .filter(|value| !value.is_empty())
346                        .or_else(|| Self::resolve_gendered_value(&value.long, requested_gender))
347                }
348                _ => Self::resolve_gendered_value(&value.long, requested_gender),
349            },
350        }
351    }
352
353    /// Get a contributor role term.
354    pub fn role_term(
355        &self,
356        role: &ContributorRole,
357        plural: bool,
358        form: &TermForm,
359        requested_gender: Option<GrammaticalGender>,
360    ) -> Option<&str> {
361        let term = self.roles.get(role)?;
362        let simple = if plural { &term.plural } else { &term.singular };
363        let term_text = match *form {
364            TermForm::Long => Self::resolve_gendered_value(&simple.long, requested_gender),
365            TermForm::Short => {
366                Self::resolve_gendered_value(&simple.short, requested_gender.clone())
367                    .filter(|value| !value.is_empty())
368                    .or_else(|| Self::resolve_gendered_value(&simple.long, requested_gender))
369            }
370            TermForm::Verb => Self::resolve_gendered_value(&term.verb.long, None),
371            TermForm::VerbShort => Self::resolve_gendered_value(&term.verb.short, None)
372                .filter(|value| !value.is_empty())
373                .or_else(|| Self::resolve_gendered_value(&term.verb.long, None)),
374            _ => Self::resolve_gendered_value(&simple.long, requested_gender),
375        };
376
377        match term_text {
378            Some(value) if !value.is_empty() => Some(value),
379            _ => None,
380        }
381    }
382
383    /// Resolve a contributor role term using only neutral/common values.
384    pub fn role_term_neutral(
385        &self,
386        role: &ContributorRole,
387        plural: bool,
388        form: &TermForm,
389    ) -> Option<&str> {
390        let term = self.roles.get(role)?;
391        let simple = if plural { &term.plural } else { &term.singular };
392        let term_text = match *form {
393            TermForm::Long => Self::resolve_gendered_value_neutral(&simple.long),
394            TermForm::Short => Self::resolve_gendered_value_neutral(&simple.short)
395                .filter(|value| !value.is_empty())
396                .or_else(|| Self::resolve_gendered_value_neutral(&simple.long)),
397            TermForm::Verb => Self::resolve_gendered_value(&term.verb.long, None),
398            TermForm::VerbShort => Self::resolve_gendered_value(&term.verb.short, None)
399                .filter(|value| !value.is_empty())
400                .or_else(|| Self::resolve_gendered_value(&term.verb.long, None)),
401            _ => Self::resolve_gendered_value_neutral(&simple.long),
402        };
403
404        match term_text {
405            Some(value) if !value.is_empty() => Some(value),
406            _ => None,
407        }
408    }
409
410    /// Resolve a contributor role term, evaluating MF2 messages when configured.
411    pub fn resolved_role_term(
412        &self,
413        role: &ContributorRole,
414        plural: bool,
415        form: &TermForm,
416        requested_gender: Option<GrammaticalGender>,
417    ) -> Option<String> {
418        if let Some(message_id) = Self::role_message_id(role, form)
419            && let Some(resolved) = self.resolve_message_text(
420                message_id,
421                Some(u64::from(plural) + 1),
422                requested_gender.clone(),
423            )
424        {
425            return Some(resolved);
426        }
427
428        self.role_term(role, plural, form, requested_gender)
429            .map(ToOwned::to_owned)
430    }
431
432    /// Resolve a contributor role term using only neutral/common values.
433    pub fn resolved_role_term_neutral(
434        &self,
435        role: &ContributorRole,
436        plural: bool,
437        form: &TermForm,
438    ) -> Option<String> {
439        if let Some(message_id) = Self::role_message_id(role, form)
440            && let Some(resolved) = self.resolve_message_text(
441                message_id,
442                Some(u64::from(plural) + 1),
443                Some(GrammaticalGender::Common),
444            )
445        {
446            return Some(resolved);
447        }
448
449        self.role_term_neutral(role, plural, form)
450            .map(ToOwned::to_owned)
451    }
452
453    /// Get a locator term.
454    pub fn locator_term(
455        &self,
456        locator: &LocatorType,
457        plural: bool,
458        form: &TermForm,
459        requested_gender: Option<GrammaticalGender>,
460    ) -> Option<&str> {
461        let term = self.locators.get(locator)?;
462        let form_term = match *form {
463            TermForm::Long => &term.long,
464            TermForm::Short => &term.short,
465            TermForm::Symbol => &term.symbol,
466            _ => &term.short, // Fallback
467        };
468
469        if let Some(ft) = form_term {
470            let value = if plural { &ft.plural } else { &ft.singular };
471            Self::resolve_gendered_value(value, requested_gender)
472        } else {
473            None
474        }
475    }
476
477    /// Resolve a locator term, evaluating MF2 messages when configured.
478    pub fn resolved_locator_term(
479        &self,
480        locator: &LocatorType,
481        plural: bool,
482        form: &TermForm,
483        requested_gender: Option<GrammaticalGender>,
484    ) -> Option<String> {
485        if let Some(message_id) = Self::locator_message_id(locator, form)
486            && let Some(resolved) = self.resolve_message_text(
487                message_id,
488                Some(u64::from(plural) + 1),
489                requested_gender.clone(),
490            )
491        {
492            return Some(resolved);
493        }
494
495        self.locator_term(locator, plural, form, requested_gender.clone())
496            .map(ToOwned::to_owned)
497            .or_else(|| {
498                if let LocatorType::Custom(key) = locator {
499                    self.locator_term_any_form(locator, plural, requested_gender)
500                        .map(ToOwned::to_owned)
501                        .or_else(|| Some(key.clone()))
502                } else {
503                    None
504                }
505            })
506    }
507
508    fn locator_term_any_form(
509        &self,
510        locator: &LocatorType,
511        plural: bool,
512        requested_gender: Option<GrammaticalGender>,
513    ) -> Option<&str> {
514        let term = self.locators.get(locator)?;
515        [&term.long, &term.short, &term.symbol]
516            .into_iter()
517            .flatten()
518            .next()
519            .map(|forms| {
520                if plural {
521                    Self::resolve_gendered_value(&forms.plural, requested_gender).unwrap_or("")
522                } else {
523                    Self::resolve_gendered_value(&forms.singular, requested_gender).unwrap_or("")
524                }
525            })
526            .filter(|value| !value.is_empty())
527    }
528
529    /// Resolve a general term to a borrowed string.
530    pub fn general_term(
531        &self,
532        term: &GeneralTerm,
533        form: &TermForm,
534        requested_gender: Option<GrammaticalGender>,
535    ) -> Option<&str> {
536        // Legacy borrowed lookup path: prefer plain v2 messages first, then
537        // alias-backed messages, and finally the v1 term tables.
538        let candidate_id = format!("term.{}", Self::general_term_to_message_id(term));
539        if let Some(msg) = self.messages.get(&candidate_id) {
540            // Only use plain messages here (no ICU variable syntax)
541            if !msg.contains('{') {
542                return Some(msg.as_str());
543            }
544        }
545        // Check legacy_term_aliases
546        let legacy_key = Self::general_term_to_legacy_key(term);
547        if let Some(msg_id) = self.legacy_term_aliases.get(legacy_key)
548            && let Some(msg) = self.messages.get(msg_id)
549            && !msg.contains('{')
550        {
551            return Some(msg.as_str());
552        }
553
554        // First try the flattened map
555        if *term != GeneralTerm::NoDate
556            && let Some(simple) = self.terms.general.get(term)
557        {
558            return match *form {
559                TermForm::Long => Self::resolve_gendered_value(&simple.long, requested_gender),
560                TermForm::Short => {
561                    Self::resolve_gendered_value(&simple.short, requested_gender.clone())
562                        .filter(|value| !value.is_empty())
563                        .or_else(|| Self::resolve_gendered_value(&simple.long, requested_gender))
564                }
565
566                _ => Self::resolve_gendered_value(&simple.long, requested_gender),
567            };
568        }
569
570        // Fallback to specific fields for common terms
571        match term {
572            GeneralTerm::And => self.terms.and.as_deref(),
573            GeneralTerm::EtAl => self.terms.et_al.as_deref(),
574            GeneralTerm::AndOthers => self.terms.and_others.as_deref(),
575            GeneralTerm::Accessed => self.terms.accessed.as_deref(),
576            GeneralTerm::Ibid => self.terms.ibid.as_deref(),
577            GeneralTerm::In => self.terms.in_.as_deref(),
578            GeneralTerm::NoDate => self
579                .terms
580                .general
581                .get(term)
582                .and_then(|value| Self::resolve_no_date_value(value, form, requested_gender))
583                .or(self.terms.no_date.as_deref()),
584            GeneralTerm::Retrieved => self.terms.retrieved.as_deref(),
585            GeneralTerm::At => self.terms.at.as_deref(),
586            GeneralTerm::By => self.terms.by.as_deref(),
587            GeneralTerm::From => self.terms.from.as_deref(),
588            GeneralTerm::Of => self
589                .terms
590                .general
591                .get(term)
592                .and_then(|value| Self::resolve_gendered_value(&value.long, requested_gender)),
593            GeneralTerm::To => self
594                .terms
595                .general
596                .get(term)
597                .and_then(|value| Self::resolve_gendered_value(&value.long, requested_gender)),
598            GeneralTerm::Anonymous => {
599                Self::resolve_gendered_value(&self.terms.anonymous.long, requested_gender)
600            }
601            GeneralTerm::Circa => {
602                Self::resolve_gendered_value(&self.terms.circa.long, requested_gender)
603            }
604            // Fallback to locators for shared terms
605            GeneralTerm::Volume => {
606                self.locator_term(&LocatorType::Volume, false, form, requested_gender)
607            }
608            GeneralTerm::Issue => {
609                self.locator_term(&LocatorType::Issue, false, form, requested_gender)
610            }
611            GeneralTerm::Page => {
612                self.locator_term(&LocatorType::Page, false, form, requested_gender)
613            }
614            GeneralTerm::Chapter => {
615                self.locator_term(&LocatorType::Chapter, false, form, requested_gender)
616            }
617            GeneralTerm::Section => {
618                self.locator_term(&LocatorType::Section, false, form, requested_gender)
619            }
620            GeneralTerm::Here => self
621                .terms
622                .general
623                .get(term)
624                .and_then(|value| Self::resolve_gendered_value(&value.long, requested_gender)),
625            GeneralTerm::Deposited => self
626                .terms
627                .general
628                .get(term)
629                .and_then(|value| Self::resolve_gendered_value(&value.long, requested_gender)),
630            _ => None,
631        }
632    }
633
634    /// Resolve a general term, evaluating MF2 messages when configured.
635    pub fn resolved_general_term(
636        &self,
637        term: &GeneralTerm,
638        form: &TermForm,
639        requested_gender: Option<GrammaticalGender>,
640    ) -> Option<String> {
641        if let Some(message_id) = Self::general_message_id(term, form)
642            && let Some(resolved) =
643                self.resolve_message_text(message_id, None, requested_gender.clone())
644        {
645            return Some(resolved);
646        }
647
648        self.general_term(term, form, requested_gender)
649            .map(ToOwned::to_owned)
650    }
651
652    /// Resolve an archive hierarchy label, using MF2 messages.
653    /// Returns singular form (count=1) by default.
654    pub fn resolved_archive_term(&self, field: ArchiveHierarchyField) -> Option<String> {
655        self.resolve_message_text(field.message_id(), Some(1), None)
656    }
657
658    /// Get the "and" term based on style preference.
659    pub fn and_term(&self, use_symbol: bool) -> &str {
660        if use_symbol {
661            self.terms.and_symbol.as_deref().unwrap_or("&")
662        } else {
663            self.terms.and.as_deref().unwrap_or("and")
664        }
665    }
666
667    /// Get the "et al." term.
668    pub fn et_al(&self) -> &str {
669        self.terms.et_al.as_deref().unwrap_or("et al.")
670    }
671
672    /// Get a month name.
673    pub fn month_name(&self, month: u8, short: bool) -> &str {
674        let idx = (month.saturating_sub(1)) as usize;
675        if short {
676            self.dates
677                .months
678                .short
679                .get(idx)
680                .map(|s| s.as_str())
681                .unwrap_or("")
682        } else {
683            self.dates
684                .months
685                .long
686                .get(idx)
687                .map(|s| s.as_str())
688                .unwrap_or("")
689        }
690    }
691
692    /// Resolve a `pattern.date-*` message with locale-specific year/month/day
693    /// components.
694    ///
695    /// Returns `Some(rendered)` only when the locale carries an MF2 message
696    /// at `message_id` and the evaluator produces output. Callers fall back
697    /// to the engine's hardcoded English assembly on `None`.
698    ///
699    /// A component is forwarded to the evaluator only when non-empty; an
700    /// authored pattern that references `{$day}` therefore yields `None` if
701    /// the input date carries no day, letting the caller pick a shorter form.
702    ///
703    /// The day argument is taken as `Option<u32>` rather than a pre-formatted
704    /// string so the digit-to-string allocation is deferred until after the
705    /// message lookup succeeds — the common case for legacy locales (`en-US`,
706    /// every v1 file) is the lookup miss, which now incurs zero allocation.
707    pub fn resolve_date_pattern(
708        &self,
709        message_id: &str,
710        year: Option<&str>,
711        month: Option<&str>,
712        day: Option<u32>,
713    ) -> Option<String> {
714        let message = self.messages.get(message_id)?;
715        if self.evaluation.message_syntax == MessageSyntax::Static {
716            return None;
717        }
718
719        let day_str = day.map(|d| d.to_string());
720        let args = MessageArgs {
721            year: year.filter(|s| !s.is_empty()),
722            month: month.filter(|s| !s.is_empty()),
723            day: day_str.as_deref(),
724            ..MessageArgs::default()
725        };
726        self.evaluator.evaluate(message, &args)
727    }
728}
729
730#[cfg(test)]
731#[allow(
732    clippy::unwrap_used,
733    clippy::expect_used,
734    clippy::panic,
735    clippy::indexing_slicing,
736    clippy::todo,
737    clippy::unimplemented,
738    clippy::unreachable,
739    clippy::get_unwrap,
740    reason = "Panicking is acceptable and often desired in tests."
741)]
742mod tests {
743    use super::*;
744
745    #[test]
746    fn test_en_us_locale() {
747        let locale = Locale::en_us();
748        assert_eq!(locale.locale, "en-US");
749        assert_eq!(locale.and_term(false), "and");
750        assert_eq!(locale.and_term(true), "&");
751        assert_eq!(locale.et_al(), "et al.");
752    }
753
754    #[test]
755    fn test_month_names() {
756        let locale = Locale::en_us();
757        assert_eq!(locale.month_name(1, false), "January");
758        assert_eq!(locale.month_name(1, true), "Jan.");
759        assert_eq!(locale.month_name(12, false), "December");
760    }
761
762    #[test]
763    fn test_role_terms() {
764        let locale = Locale::en_us();
765
766        assert_eq!(
767            locale.role_term(&ContributorRole::Editor, false, &TermForm::Short, None),
768            Some("ed.")
769        );
770        assert_eq!(
771            locale.role_term(&ContributorRole::Editor, true, &TermForm::Short, None),
772            Some("eds.")
773        );
774        assert_eq!(
775            locale.role_term(&ContributorRole::Translator, false, &TermForm::Verb, None),
776            Some("translated by")
777        );
778    }
779
780    #[test]
781    fn test_no_date_term_resolves_long_and_short_forms() {
782        let locale = Locale::en_us();
783
784        assert_eq!(
785            locale.general_term(&GeneralTerm::NoDate, &TermForm::Long, None),
786            Some("no date")
787        );
788        assert_eq!(
789            locale.general_term(&GeneralTerm::NoDate, &TermForm::Short, None),
790            Some("n.d.")
791        );
792    }
793
794    #[test]
795    fn test_no_date_term_falls_back_to_legacy_short_form() {
796        let mut locale = Locale::default();
797        locale.terms.no_date = Some("n.d.".to_string());
798
799        assert_eq!(
800            locale.general_term(&GeneralTerm::NoDate, &TermForm::Short, None),
801            Some("n.d.")
802        );
803        assert_eq!(
804            locale.general_term(&GeneralTerm::NoDate, &TermForm::Long, None),
805            Some("n.d.")
806        );
807    }
808
809    #[test]
810    fn test_locale_deserialization() {
811        let json = r#"{
812            "locale": "en-US",
813            "dates": {
814                "months": {
815                    "long": ["January", "February", "March", "April", "May", "June",
816                             "July", "August", "September", "October", "November", "December"],
817                    "short": ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
818                              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
819                },
820                "seasons": ["Spring", "Summer", "Autumn", "Winter"]
821            },
822            "roles": {},
823            "terms": {
824                "and": "and",
825                "et-al": "et al."
826            }
827        }"#;
828
829        let locale: Locale = serde_json::from_str(json).unwrap();
830        assert_eq!(locale.locale, "en-US");
831        assert_eq!(locale.dates.months.long[0], "January");
832        assert_eq!(locale.terms.and.as_ref().unwrap(), "and");
833    }
834
835    #[test]
836    fn test_yaml_locale_loading() {
837        let yaml = r#"
838locale: de-DE
839dates:
840  months:
841    long:
842      - Januar
843      - Februar
844      - März
845      - April
846      - Mai
847      - Juni
848      - Juli
849      - August
850      - September
851      - Oktober
852      - November
853      - Dezember
854    short:
855      - Jan.
856      - Feb.
857      - März
858      - Apr.
859      - Mai
860      - Juni
861      - Juli
862      - Aug.
863      - Sep.
864      - Okt.
865      - Nov.
866      - Dez.
867  seasons:
868    - Frühling
869    - Sommer
870    - Herbst
871    - Winter
872terms:
873  and:
874    long: und
875    symbol: "&"
876  et_al:
877    long: "u. a."
878"#;
879
880        let locale = Locale::from_yaml_str(yaml).unwrap();
881        assert_eq!(locale.locale, "de-DE");
882        assert_eq!(locale.and_term(false), "und");
883        assert_eq!(locale.et_al(), "u. a.");
884        assert_eq!(locale.month_name(1, false), "Januar");
885        assert_eq!(locale.month_name(3, false), "März");
886    }
887
888    #[test]
889    fn test_yaml_no_date_term_preserves_long_and_short_forms() {
890        let yaml = r#"
891locale: en-US
892dates:
893  months:
894    long: [January, February, March, April, May, June, July, August, September, October, November, December]
895    short: [Jan., Feb., Mar., Apr., May, June, July, Aug., Sept., Oct., Nov., Dec.]
896  seasons: [Spring, Summer, Autumn, Winter]
897roles: {}
898terms:
899  no date:
900    long: no date
901    short: n.d.
902"#;
903
904        let locale = Locale::from_yaml_str(yaml).unwrap();
905        assert_eq!(
906            locale.general_term(&GeneralTerm::NoDate, &TermForm::Long, None),
907            Some("no date")
908        );
909        assert_eq!(
910            locale.general_term(&GeneralTerm::NoDate, &TermForm::Short, None),
911            Some("n.d.")
912        );
913        assert_eq!(locale.terms.no_date.as_deref(), Some("n.d."));
914    }
915
916    /// v2 locale with grammar-options overrides punctuation_in_quote correctly.
917    #[test]
918    fn test_v2_grammar_options_sync_punctuation_in_quote() {
919        let yaml = r#"
920locale-schema-version: "2"
921locale: en-GB
922grammar-options:
923  punctuation-in-quote: false
924"#;
925        let locale = Locale::from_yaml_str(yaml).unwrap();
926        // grammar_options is the authoritative source for v2 locales
927        assert!(!locale.grammar_options.punctuation_in_quote);
928        // legacy field is synced from grammar_options
929        assert!(!locale.punctuation_in_quote);
930    }
931
932    /// v1 locale (no grammar-options) derives punctuation_in_quote from locale ID.
933    #[test]
934    fn test_v1_locale_derives_punctuation_from_locale_id() {
935        let yaml = r#"
936locale: en-US
937"#;
938        let locale = Locale::from_yaml_str(yaml).unwrap();
939        // en-US uses American style (inside)
940        assert!(locale.punctuation_in_quote);
941        assert!(locale.grammar_options.punctuation_in_quote);
942    }
943
944    /// apply_override merges messages key-by-key into the base locale.
945    #[test]
946    fn test_apply_override_merges_messages() {
947        let mut locale = Locale::en_us();
948        locale
949            .messages
950            .insert("term.page-label".into(), "p.".into());
951        let ov = LocaleOverride {
952            messages: [("term.page-label".into(), "pg.".into())].into(),
953            ..Default::default()
954        };
955        locale.apply_override(&ov);
956        assert_eq!(
957            locale.messages.get("term.page-label").map(|s| s.as_str()),
958            Some("pg.")
959        );
960    }
961
962    /// apply_override with grammar_options replaces block and syncs punctuation_in_quote.
963    #[test]
964    fn test_apply_override_grammar_options_syncs_punctuation() {
965        let mut locale = Locale::en_us();
966        locale.punctuation_in_quote = false;
967        let ov = LocaleOverride {
968            grammar_options: Some(GrammarOptions {
969                punctuation_in_quote: true,
970                ..Default::default()
971            }),
972            ..Default::default()
973        };
974        locale.apply_override(&ov);
975        assert!(locale.punctuation_in_quote);
976        assert!(locale.grammar_options.punctuation_in_quote);
977    }
978
979    #[test]
980    fn test_resolved_locator_term_evaluates_plural_message() {
981        let locale = Locale::en_us();
982
983        assert_eq!(
984            locale.resolved_locator_term(&LocatorType::Page, false, &TermForm::Short, None),
985            Some("p.".to_string())
986        );
987        assert_eq!(
988            locale.resolved_locator_term(&LocatorType::Page, true, &TermForm::Short, None),
989            Some("pp.".to_string())
990        );
991    }
992
993    #[test]
994    fn test_resolved_locator_term_falls_back_to_custom_locale_form_then_raw_key() {
995        let locale = Locale::from_yaml_str(
996            r#"
997locale: en-US
998locators:
999  reel:
1000    long:
1001      singular: "reel"
1002      plural: "reels"
1003"#,
1004        )
1005        .expect("custom locale should parse");
1006
1007        assert_eq!(
1008            locale.resolved_locator_term(
1009                &LocatorType::Custom("reel".to_string()),
1010                false,
1011                &TermForm::Short,
1012                None,
1013            ),
1014            Some("reel".to_string())
1015        );
1016        assert_eq!(
1017            locale.resolved_locator_term(
1018                &LocatorType::Custom("movement".to_string()),
1019                false,
1020                &TermForm::Short,
1021                None,
1022            ),
1023            Some("movement".to_string())
1024        );
1025    }
1026
1027    #[test]
1028    fn test_legacy_locator_terms_under_terms_still_populate_locators() {
1029        let locale = Locale::from_yaml_str(
1030            r#"
1031locale: en-US
1032terms:
1033  page:
1034    short:
1035      singular: "pg."
1036      plural: "pgs."
1037"#,
1038        )
1039        .expect("legacy locator terms should parse");
1040
1041        assert_eq!(
1042            locale.resolved_locator_term(&LocatorType::Page, false, &TermForm::Short, None),
1043            Some("pg.".to_string())
1044        );
1045    }
1046
1047    #[test]
1048    fn test_explicit_locators_override_legacy_terms_for_builtin_keys() {
1049        let locale = Locale::from_yaml_str(
1050            r#"
1051locale: en-US
1052terms:
1053  page:
1054    short:
1055      singular: "pg."
1056      plural: "pgs."
1057locators:
1058  page:
1059    short:
1060      singular: "p."
1061      plural: "pp."
1062"#,
1063        )
1064        .expect("mixed locator forms should parse");
1065
1066        assert_eq!(
1067            locale.resolved_locator_term(&LocatorType::Page, false, &TermForm::Short, None),
1068            Some("p.".to_string())
1069        );
1070    }
1071
1072    #[test]
1073    fn test_non_locator_terms_are_not_reclassified_as_custom_locators() {
1074        let locale = Locale::from_yaml_str(
1075            r#"
1076locale: en-US
1077terms:
1078  and:
1079    long: "und"
1080"#,
1081        )
1082        .expect("general terms should parse");
1083
1084        assert_eq!(locale.terms.and.as_deref(), Some("und"));
1085        assert!(
1086            !locale
1087                .locators
1088                .contains_key(&LocatorType::Custom("and".to_string()))
1089        );
1090    }
1091
1092    #[test]
1093    fn test_resolved_role_term_evaluates_plural_message() {
1094        let locale = Locale::en_us();
1095
1096        assert_eq!(
1097            locale.resolved_role_term(&ContributorRole::Editor, false, &TermForm::Long, None),
1098            Some("editor".to_string())
1099        );
1100        assert_eq!(
1101            locale.resolved_role_term(&ContributorRole::Editor, true, &TermForm::Long, None),
1102            Some("editors".to_string())
1103        );
1104    }
1105
1106    #[test]
1107    fn test_role_term_prefers_common_form_for_mixed_gender_requests() {
1108        let locale = Locale::from_yaml_str(
1109            r#"
1110locale: es-ES
1111roles:
1112  editor:
1113    long:
1114      singular:
1115        masculine: editor
1116        feminine: editora
1117        common: persona editora
1118      plural:
1119        masculine: editores
1120        feminine: editoras
1121        common: equipo editorial
1122    short:
1123      singular: ed.
1124      plural: eds.
1125    verb: editado por
1126"#,
1127        )
1128        .expect("gendered locale should parse");
1129
1130        assert_eq!(
1131            locale.role_term(
1132                &ContributorRole::Editor,
1133                false,
1134                &TermForm::Long,
1135                Some(GrammaticalGender::Feminine),
1136            ),
1137            Some("editora")
1138        );
1139        assert_eq!(
1140            locale.role_term(
1141                &ContributorRole::Editor,
1142                true,
1143                &TermForm::Long,
1144                Some(GrammaticalGender::Common),
1145            ),
1146            Some("equipo editorial")
1147        );
1148    }
1149
1150    #[test]
1151    fn test_no_date_term_falls_back_when_requested_gender_has_no_matching_slot() {
1152        let locale = Locale::from_yaml_str(
1153            r#"
1154locale: es-ES
1155terms:
1156  no date:
1157    long:
1158      masculine: sin fecha
1159  no_date: s. f.
1160"#,
1161        )
1162        .expect("locale should parse");
1163
1164        assert_eq!(
1165            locale.general_term(
1166                &GeneralTerm::NoDate,
1167                &TermForm::Long,
1168                Some(GrammaticalGender::Common),
1169            ),
1170            Some("s. f.")
1171        );
1172    }
1173
1174    #[test]
1175    fn test_es_es_locale_is_embedded() {
1176        let bytes = crate::embedded::get_locale_bytes("es-ES").expect("es-ES should be embedded");
1177        let yaml = std::str::from_utf8(bytes).expect("embedded locale should be utf-8");
1178        let locale = Locale::from_yaml_str(yaml).expect("embedded es-ES should parse");
1179
1180        assert_eq!(locale.locale, "es-ES");
1181        assert_eq!(
1182            locale.resolved_role_term(
1183                &ContributorRole::Editor,
1184                false,
1185                &TermForm::Long,
1186                Some(GrammaticalGender::Feminine),
1187            ),
1188            Some("editora".to_string())
1189        );
1190    }
1191
1192    #[test]
1193    fn embedded_locale_ids_include_all_bundled_locale_files() {
1194        for id in [
1195            "en-US", "ar-AR", "de-DE", "es-ES", "eu-ES", "fr-FR", "tr-TR",
1196        ] {
1197            assert!(
1198                crate::embedded::EMBEDDED_LOCALE_IDS.contains(&id),
1199                "{id} should be listed as an embedded locale"
1200            );
1201        }
1202    }
1203
1204    #[test]
1205    fn bundled_ar_ar_and_eu_es_locales_are_embedded_and_parseable() {
1206        for id in ["ar-AR", "eu-ES"] {
1207            let bytes = crate::embedded::get_locale_bytes(id).expect("locale should be embedded");
1208            let yaml = std::str::from_utf8(bytes).expect("embedded locale should be utf-8");
1209            let locale = Locale::from_yaml_str(yaml).expect("embedded locale should parse");
1210
1211            assert_eq!(locale.locale, id);
1212        }
1213    }
1214
1215    #[test]
1216    fn test_es_es_role_term_resolves_gendered_mf2_message() {
1217        let bytes = crate::embedded::get_locale_bytes("es-ES").expect("es-ES should be embedded");
1218        let yaml = std::str::from_utf8(bytes).expect("embedded locale should be utf-8");
1219        let locale = Locale::from_yaml_str(yaml).expect("embedded es-ES should parse");
1220
1221        assert_eq!(
1222            locale.resolved_role_term(
1223                &ContributorRole::Editor,
1224                true,
1225                &TermForm::Long,
1226                Some(GrammaticalGender::Masculine),
1227            ),
1228            Some("editores".to_string())
1229        );
1230        assert_eq!(
1231            locale.resolved_role_term(
1232                &ContributorRole::Translator,
1233                true,
1234                &TermForm::Long,
1235                Some(GrammaticalGender::Feminine),
1236            ),
1237            Some("traductoras".to_string())
1238        );
1239        assert_eq!(
1240            locale.resolved_role_term_neutral(&ContributorRole::Editor, true, &TermForm::Long),
1241            Some("equipo editorial".to_string())
1242        );
1243    }
1244
1245    #[test]
1246    fn test_role_term_falls_back_when_mf2_message_cannot_evaluate() {
1247        let locale = Locale::from_yaml_str(
1248            r#"
1249locale: es-ES
1250evaluation:
1251  message-syntax: mf2
1252messages:
1253  role.editor.label-long: |
1254    .match {$gender :unknown} {$count :plural}
1255    when feminine one {editora}
1256roles:
1257  editor:
1258    long:
1259      singular:
1260        feminine: editora heredada
1261      plural:
1262        feminine: editoras heredadas
1263"#,
1264        )
1265        .expect("locale should parse");
1266
1267        assert_eq!(
1268            locale.resolved_role_term(
1269                &ContributorRole::Editor,
1270                false,
1271                &TermForm::Long,
1272                Some(GrammaticalGender::Feminine),
1273            ),
1274            Some("editora heredada".to_string())
1275        );
1276    }
1277
1278    #[test]
1279    fn test_lookup_genre_known_key() {
1280        let locale = Locale::from_yaml_str(
1281            r#"
1282locale: en-US
1283vocab:
1284  genre:
1285    phd-thesis: "PhD thesis"
1286"#,
1287        )
1288        .unwrap();
1289        assert_eq!(locale.lookup_genre("phd-thesis"), "PhD thesis");
1290    }
1291
1292    #[test]
1293    fn test_lookup_medium_known_key() {
1294        let locale = Locale::from_yaml_str(
1295            r#"
1296locale: en-US
1297vocab:
1298  medium:
1299    television: "Television"
1300"#,
1301        )
1302        .unwrap();
1303        assert_eq!(locale.lookup_medium("television"), "Television");
1304    }
1305
1306    #[test]
1307    fn test_lookup_genre_fallback() {
1308        let locale = Locale::en_us();
1309        // Unknown key → title-case first word + spaces
1310        assert_eq!(locale.lookup_genre("unknown-key"), "Unknown key");
1311    }
1312
1313    #[test]
1314    fn test_en_us_locale_uses_embedded_vocab() {
1315        let locale = Locale::en_us();
1316
1317        assert_eq!(locale.lookup_genre("phd-thesis"), "PhD thesis");
1318        assert_eq!(locale.lookup_medium("audio-cd"), "Audio CD");
1319    }
1320
1321    #[test]
1322    fn test_from_yaml_str_inherits_embedded_vocab_defaults() {
1323        let locale = Locale::from_yaml_str("locale: en-US\n").unwrap();
1324
1325        assert_eq!(locale.lookup_genre("phd-thesis"), "PhD thesis");
1326    }
1327
1328    #[test]
1329    fn test_partial_genre_vocab_override_preserves_medium_defaults() {
1330        let locale = Locale::from_yaml_str(
1331            r#"
1332locale: en-US
1333vocab:
1334  genre:
1335    phd-thesis: "Doctoral dissertation"
1336"#,
1337        )
1338        .unwrap();
1339
1340        assert_eq!(locale.lookup_genre("phd-thesis"), "Doctoral dissertation");
1341        assert_eq!(locale.lookup_medium("audio-cd"), "Audio CD");
1342    }
1343
1344    #[test]
1345    fn test_partial_medium_vocab_override_preserves_genre_defaults() {
1346        let locale = Locale::from_yaml_str(
1347            r#"
1348locale: en-US
1349vocab:
1350  medium:
1351    television: "Broadcast television"
1352"#,
1353        )
1354        .unwrap();
1355
1356        assert_eq!(locale.lookup_medium("television"), "Broadcast television");
1357        assert_eq!(locale.lookup_genre("phd-thesis"), "PhD thesis");
1358    }
1359
1360    #[test]
1361    fn test_kebab_to_display_single_word() {
1362        assert_eq!(kebab_to_display("video"), "Video");
1363    }
1364
1365    #[test]
1366    fn test_kebab_to_display_multiple_words() {
1367        assert_eq!(kebab_to_display("phd-thesis"), "Phd thesis");
1368        assert_eq!(kebab_to_display("audio-cd"), "Audio cd");
1369    }
1370}