Skip to main content

citum_schema_style/locale/
mod.rs

1/*
2SPDX-License-Identifier: MIT OR Apache-2.0
3SPDX-FileCopyrightText: © 2023-2026 Bruce D'Arcus
4*/
5
6//! Locale definitions for Citum.
7//!
8//! Locales provide language-specific terms, date formats, and punctuation rules
9//! for citation formatting.
10
11/// Locator text normalization.
12pub mod locator;
13/// Message evaluation for parameterized locale strings.
14pub mod message;
15/// Raw locale types used during locale file parsing.
16pub mod raw;
17/// Structured locale types used by the processor.
18pub mod types;
19
20use crate::citation::LocatorType;
21use crate::template::ContributorRole;
22pub use message::{MessageArgs, MessageEvaluator, Mf2MessageEvaluator};
23pub use raw::{RawLocale, RawTermValue};
24#[cfg(feature = "schema")]
25use schemars::JsonSchema;
26use serde::{Deserialize, Serialize};
27use std::collections::HashMap;
28use std::fmt;
29use std::sync::{Arc, OnceLock};
30pub use types::*;
31
32/// A list of month names (12 elements for Jan-Dec).
33pub type MonthList = Vec<String>;
34
35/// Identifies a field in the archive hierarchy for locale term lookup.
36#[derive(Clone, Copy, Debug, PartialEq, Eq)]
37pub enum ArchiveHierarchyField {
38    /// Named collection or record group.
39    Collection,
40    /// Named series or sub-collection.
41    Series,
42    /// Box or container designation.
43    Box,
44    /// Folder designation.
45    Folder,
46    /// Item, file, or reference-code designation.
47    Item,
48}
49
50impl ArchiveHierarchyField {
51    /// Returns the MF2 message ID for this field's locale label.
52    fn message_id(self) -> &'static str {
53        match self {
54            Self::Collection => "term.archive-collection-label",
55            Self::Series => "term.archive-series-label",
56            Self::Box => "term.archive-box-label",
57            Self::Folder => "term.archive-folder-label",
58            Self::Item => "term.archive-item-label",
59        }
60    }
61}
62
63/// A locale definition containing language-specific terms and formatting rules.
64///
65/// The `evaluator` field holds the message evaluation engine, selected based on
66/// `evaluation.message_syntax`. This allows for trait-based swapping to ICU4X
67/// implementations in the future without changing call sites.
68#[derive(Clone, Deserialize, Serialize)]
69#[cfg_attr(feature = "schema", derive(JsonSchema))]
70#[serde(rename_all = "kebab-case")]
71pub struct Locale {
72    /// The locale identifier (e.g., "en-US", "de-DE").
73    #[cfg_attr(feature = "schema", schemars(skip))]
74    pub locale: String,
75    /// Date-related terms (months, seasons).
76    #[serde(default)]
77    pub dates: DateTerms,
78    /// Contributor role terms (editor, translator, etc.).
79    #[serde(default)]
80    #[cfg_attr(feature = "schema", schemars(skip))]
81    pub roles: HashMap<ContributorRole, ContributorTerm>,
82    /// Locator terms (page, chapter, etc.).
83    #[serde(default)]
84    #[cfg_attr(feature = "schema", schemars(skip))]
85    pub locators: HashMap<LocatorType, LocatorTerm>,
86    /// General terms (and, et al., etc.).
87    #[serde(default)]
88    pub terms: Terms,
89    /// Whether to place periods/commas inside quotation marks.
90    /// true = American style ("text."), false = British style ("text".)
91    #[serde(default)]
92    pub punctuation_in_quote: bool,
93    /// Articles to strip from titles when sorting (e.g., "the", "a", "an" for English).
94    /// These should be lowercase and will be matched case-insensitively.
95    #[serde(default, skip_serializing_if = "Vec::is_empty")]
96    pub sort_articles: Vec<String>,
97    /// Schema version from the source locale file (None = legacy v1).
98    #[serde(default, skip_serializing_if = "Option::is_none")]
99    pub locale_schema_version: Option<String>,
100    /// Runtime evaluation configuration.
101    #[serde(default)]
102    pub evaluation: EvaluationConfig,
103    /// ICU MF1 messages keyed by message ID (populated for v2 locales).
104    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
105    pub messages: HashMap<String, String>,
106    /// Named date format presets: symbolic name → CLDR pattern.
107    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
108    pub date_formats: HashMap<String, String>,
109    /// Number formatting options.
110    #[serde(default)]
111    pub number_formats: NumberFormats,
112    /// Grammar options.
113    #[serde(default)]
114    pub grammar_options: GrammarOptions,
115    /// Backwards-compatibility aliases: old term key → new message ID.
116    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
117    pub legacy_term_aliases: HashMap<String, String>,
118    /// Vocabulary maps for genre and medium display text.
119    #[serde(default, skip_serializing_if = "VocabMap::is_empty")]
120    pub vocab: VocabMap,
121    /// Message evaluator implementation (not serialized; set during load).
122    #[serde(skip, default = "default_evaluator")]
123    #[cfg_attr(feature = "schema", schemars(skip))]
124    pub evaluator: Arc<dyn MessageEvaluator>,
125}
126
127/// Default message evaluator (MF2).
128fn default_evaluator() -> Arc<dyn MessageEvaluator> {
129    Arc::new(Mf2MessageEvaluator)
130}
131
132#[derive(Deserialize)]
133struct EmbeddedVocabDocument {
134    #[serde(default)]
135    vocab: Option<raw::RawVocab>,
136}
137
138/// Extract one top-level YAML section while preserving its nested indentation.
139fn extract_top_level_yaml_section(yaml: &str, key: &str) -> Option<String> {
140    let header = format!("{key}:");
141    let mut collected = Vec::new();
142    let mut in_section = false;
143
144    for line in yaml.lines() {
145        let trimmed = line.trim_end_matches('\r');
146        let is_top_level =
147            !trimmed.is_empty() && !trimmed.starts_with(' ') && !trimmed.starts_with('\t');
148
149        if in_section {
150            if is_top_level {
151                break;
152            }
153            collected.push(trimmed);
154            continue;
155        }
156
157        if trimmed == header {
158            in_section = true;
159            collected.push(trimmed);
160        }
161    }
162
163    if collected.is_empty() {
164        None
165    } else {
166        Some(collected.join("\n"))
167    }
168}
169
170/// Archive hierarchy label messages for the hardcoded en-US locale.
171///
172/// Only the archive terms are pre-seeded here; all other message lookups fall
173/// through to the legacy typed term maps so that the hardcoded `en_us()`
174/// constructor stays consistent with the pre-existing test baseline.
175fn en_us_archive_messages() -> HashMap<String, String> {
176    [
177        ("term.archive-collection-label".into(), "collection".into()),
178        ("term.archive-series-label".into(), "series".into()),
179        (
180            "term.archive-box-label".into(),
181            ".match {$count :plural}\nwhen one {box}\nwhen * {boxes}".into(),
182        ),
183        (
184            "term.archive-folder-label".into(),
185            ".match {$count :plural}\nwhen one {folder}\nwhen * {folders}".into(),
186        ),
187        (
188            "term.archive-item-label".into(),
189            ".match {$count :plural}\nwhen one {item}\nwhen * {items}".into(),
190        ),
191    ]
192    .into()
193}
194
195/// Curated en-US genre and medium labels from the embedded locale asset.
196fn embedded_en_us_vocab() -> &'static VocabMap {
197    static EN_US_VOCAB: OnceLock<VocabMap> = OnceLock::new();
198
199    EN_US_VOCAB.get_or_init(|| {
200        crate::embedded::get_locale_bytes("en-US")
201            .and_then(|bytes| std::str::from_utf8(bytes).ok())
202            .and_then(|yaml| extract_top_level_yaml_section(yaml, "vocab"))
203            .and_then(|vocab_yaml| serde_yaml::from_str::<EmbeddedVocabDocument>(&vocab_yaml).ok())
204            .and_then(|document| document.vocab)
205            .map(|document| VocabMap {
206                genre: document.genre,
207                medium: document.medium,
208            })
209            .unwrap_or_default()
210    })
211}
212
213impl Default for Locale {
214    fn default() -> Self {
215        Self {
216            locale: String::default(),
217            dates: DateTerms::default(),
218            roles: HashMap::default(),
219            locators: HashMap::default(),
220            terms: Terms::default(),
221            punctuation_in_quote: false,
222            sort_articles: Vec::default(),
223            locale_schema_version: None,
224            evaluation: EvaluationConfig::default(),
225            messages: HashMap::default(),
226            date_formats: HashMap::default(),
227            number_formats: NumberFormats::default(),
228            grammar_options: GrammarOptions::default(),
229            legacy_term_aliases: HashMap::default(),
230            vocab: VocabMap::default(),
231            evaluator: default_evaluator(),
232        }
233    }
234}
235
236impl fmt::Debug for Locale {
237    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
238        f.debug_struct("Locale")
239            .field("locale", &self.locale)
240            .field("dates", &self.dates)
241            .field("roles", &self.roles)
242            .field("locators", &self.locators)
243            .field("terms", &self.terms)
244            .field("punctuation_in_quote", &self.punctuation_in_quote)
245            .field("sort_articles", &self.sort_articles)
246            .field("locale_schema_version", &self.locale_schema_version)
247            .field("evaluation", &self.evaluation)
248            .field("messages", &self.messages)
249            .field("date_formats", &self.date_formats)
250            .field("number_formats", &self.number_formats)
251            .field("grammar_options", &self.grammar_options)
252            .field("legacy_term_aliases", &self.legacy_term_aliases)
253            .field("vocab", &self.vocab)
254            .field("evaluator", &"<MessageEvaluator>")
255            .finish()
256    }
257}
258
259/// Extract English (US) role terms.
260fn en_us_role_terms() -> HashMap<ContributorRole, ContributorTerm> {
261    let mut roles = HashMap::new();
262
263    roles.insert(
264        ContributorRole::Editor,
265        ContributorTerm {
266            singular: SimpleTerm {
267                long: "editor".into(),
268                short: "ed.".into(),
269            },
270            plural: SimpleTerm {
271                long: "editors".into(),
272                short: "eds.".into(),
273            },
274            verb: SimpleTerm {
275                long: "edited by".into(),
276                short: "ed.".into(),
277            },
278        },
279    );
280
281    roles.insert(
282        ContributorRole::Translator,
283        ContributorTerm {
284            singular: SimpleTerm {
285                long: "translator".into(),
286                short: "Trans.".into(),
287            },
288            plural: SimpleTerm {
289                long: "translators".into(),
290                short: "Trans.".into(),
291            },
292            verb: SimpleTerm {
293                long: "translated by".into(),
294                short: "Trans.".into(),
295            },
296        },
297    );
298
299    roles.insert(
300        ContributorRole::Director,
301        ContributorTerm {
302            singular: SimpleTerm {
303                long: "director".into(),
304                short: "Dir.".into(),
305            },
306            plural: SimpleTerm {
307                long: "directors".into(),
308                short: "dirs.".into(),
309            },
310            verb: SimpleTerm {
311                long: "directed by".into(),
312                short: "dir.".into(),
313            },
314        },
315    );
316
317    roles.insert(
318        ContributorRole::Interviewer,
319        ContributorTerm {
320            singular: SimpleTerm {
321                long: "Interviewer".into(),
322                short: "Interviewer".into(),
323            },
324            plural: SimpleTerm {
325                long: "Interviewers".into(),
326                short: "Interviewers".into(),
327            },
328            verb: SimpleTerm {
329                long: "interviewed by".into(),
330                short: "interviewed by".into(),
331            },
332        },
333    );
334
335    roles
336}
337
338/// Extract English (US) locator terms.
339fn en_us_locator_terms() -> HashMap<LocatorType, LocatorTerm> {
340    let mut locators = HashMap::new();
341    locators.insert(
342        LocatorType::Page,
343        LocatorTerm {
344            long: Some(SingularPlural {
345                singular: "page".into(),
346                plural: "pages".into(),
347            }),
348            short: Some(SingularPlural {
349                singular: "p.".into(),
350                plural: "pp.".into(),
351            }),
352            symbol: None,
353            gender: None,
354        },
355    );
356
357    locators.insert(
358        LocatorType::Chapter,
359        LocatorTerm {
360            long: Some(SingularPlural {
361                singular: "chapter".into(),
362                plural: "chapters".into(),
363            }),
364            short: Some(SingularPlural {
365                singular: "ch.".into(),
366                plural: "chs.".into(),
367            }),
368            symbol: None,
369            gender: None,
370        },
371    );
372
373    locators.insert(
374        LocatorType::Volume,
375        LocatorTerm {
376            long: Some(SingularPlural {
377                singular: "volume".into(),
378                plural: "volumes".into(),
379            }),
380            short: Some(SingularPlural {
381                singular: "vol.".into(),
382                plural: "vols.".into(),
383            }),
384            symbol: None,
385            gender: None,
386        },
387    );
388
389    locators.insert(
390        LocatorType::Section,
391        LocatorTerm {
392            long: Some(SingularPlural {
393                singular: "section".into(),
394                plural: "sections".into(),
395            }),
396            short: Some(SingularPlural {
397                singular: "sec.".into(),
398                plural: "secs.".into(),
399            }),
400            symbol: Some(SingularPlural {
401                singular: "§".into(),
402                plural: "§§".into(),
403            }),
404            gender: None,
405        },
406    );
407
408    locators.insert(
409        LocatorType::Part,
410        LocatorTerm {
411            long: Some(SingularPlural {
412                singular: "part".into(),
413                plural: "parts".into(),
414            }),
415            short: Some(SingularPlural {
416                singular: "pt.".into(),
417                plural: "pts.".into(),
418            }),
419            symbol: None,
420            gender: None,
421        },
422    );
423
424    locators.insert(
425        LocatorType::Supplement,
426        LocatorTerm {
427            long: Some(SingularPlural {
428                singular: "supplement".into(),
429                plural: "supplements".into(),
430            }),
431            short: Some(SingularPlural {
432                singular: "suppl.".into(),
433                plural: "suppls.".into(),
434            }),
435            symbol: None,
436            gender: None,
437        },
438    );
439
440    locators
441}
442
443/// Convert a kebab-case key to a human-readable display string.
444///
445/// Splits on `-`, capitalizes the first character of the first word, and joins with spaces.
446fn kebab_to_display(key: &str) -> String {
447    let mut words = key.split('-');
448    let mut result = String::new();
449    if let Some(first) = words.next() {
450        let mut chars = first.chars();
451        if let Some(c) = chars.next() {
452            result.extend(c.to_uppercase());
453            result.push_str(chars.as_str());
454        }
455        for word in words {
456            result.push(' ');
457            result.push_str(word);
458        }
459    }
460    result
461}
462
463impl Locale {
464    /// Create a new English (US) locale with default terms.
465    pub fn en_us() -> Self {
466        Self {
467            locale: "en-US".into(),
468            dates: DateTerms::en_us(),
469            roles: en_us_role_terms(),
470            locators: en_us_locator_terms(),
471            terms: Terms::en_us(),
472            punctuation_in_quote: true,
473            sort_articles: vec!["the".into(), "a".into(), "an".into()],
474            locale_schema_version: None,
475            evaluation: EvaluationConfig {
476                message_syntax: MessageSyntax::Mf2,
477            },
478            messages: en_us_archive_messages(),
479            date_formats: HashMap::new(),
480            number_formats: NumberFormats {
481                decimal_separator: ".".into(),
482                thousands_separator: ",".into(),
483                minimum_digits: 1,
484            },
485            grammar_options: GrammarOptions {
486                punctuation_in_quote: true,
487                nbsp_before_colon: false,
488                open_quote: "\u{201C}".into(),
489                close_quote: "\u{201D}".into(),
490                open_inner_quote: "\u{2018}".into(),
491                close_inner_quote: "\u{2019}".into(),
492                serial_comma: true,
493                page_range_delimiter: "\u{2013}".into(),
494            },
495            legacy_term_aliases: HashMap::new(),
496            vocab: embedded_en_us_vocab().clone(),
497            evaluator: Arc::new(Mf2MessageEvaluator),
498        }
499    }
500
501    /// Strip leading articles from a string for sorting.
502    ///
503    /// Uses locale-specific articles (e.g., "the", "a", "an" for English;
504    /// "der", "die", "das" for German). Falls back to English articles
505    /// if no locale-specific articles are defined.
506    pub fn strip_sort_articles<'a>(&self, s: &'a str) -> &'a str {
507        let s = s.trim();
508
509        // Default English articles
510        const DEFAULT_ARTICLES: &[&str] = &["the", "a", "an"];
511
512        if self.sort_articles.is_empty() {
513            // Use default English articles
514            for article in DEFAULT_ARTICLES {
515                let prefix = format!("{} ", article);
516                if s.to_lowercase().starts_with(&prefix) {
517                    #[allow(
518                        clippy::string_slice,
519                        reason = "prefix is derived from ASCII article"
520                    )]
521                    return &s[prefix.len()..];
522                }
523            }
524        } else {
525            // Use locale-specific articles
526            for article in &self.sort_articles {
527                let prefix = format!("{} ", article);
528                if s.to_lowercase().starts_with(&prefix) {
529                    #[allow(
530                        clippy::string_slice,
531                        reason = "prefix is derived from a defined article"
532                    )]
533                    return &s[prefix.len()..];
534                }
535            }
536        }
537        s
538    }
539
540    /// Look up display text for a genre canonical key.
541    ///
542    /// Falls back to a readable form of the key if no translation found.
543    pub fn lookup_genre(&self, key: &str) -> String {
544        self.vocab
545            .genre
546            .get(key)
547            .cloned()
548            .unwrap_or_else(|| kebab_to_display(key))
549    }
550
551    /// Look up display text for a medium canonical key.
552    ///
553    /// Falls back to a readable form of the key if no translation found.
554    pub fn lookup_medium(&self, key: &str) -> String {
555        self.vocab
556            .medium
557            .get(key)
558            .cloned()
559            .unwrap_or_else(|| kebab_to_display(key))
560    }
561
562    /// Get default articles for a locale based on language code.
563    fn default_articles_for_locale(locale_id: &str) -> Vec<String> {
564        // Extract language code (first 2 chars)
565        #[allow(clippy::string_slice, reason = "locale_id is expected to be ASCII")]
566        let lang = &locale_id[..2.min(locale_id.len())];
567        match lang {
568            "en" => vec!["the".into(), "a".into(), "an".into()],
569            "de" => vec![
570                "der".into(),
571                "die".into(),
572                "das".into(),
573                "ein".into(),
574                "eine".into(),
575            ],
576            "fr" => vec![
577                "le".into(),
578                "la".into(),
579                "les".into(),
580                "l'".into(),
581                "un".into(),
582                "une".into(),
583            ],
584            "es" => vec![
585                "el".into(),
586                "la".into(),
587                "los".into(),
588                "las".into(),
589                "un".into(),
590                "una".into(),
591            ],
592            "it" => vec![
593                "il".into(),
594                "lo".into(),
595                "la".into(),
596                "i".into(),
597                "gli".into(),
598                "le".into(),
599                "un".into(),
600                "una".into(),
601            ],
602            "pt" => vec![
603                "o".into(),
604                "a".into(),
605                "os".into(),
606                "as".into(),
607                "um".into(),
608                "uma".into(),
609            ],
610            "nl" => vec!["de".into(), "het".into(), "een".into()],
611            _ => vec![], // Fall back to English defaults in strip_sort_articles
612        }
613    }
614
615    /// Get a contributor role term.
616    fn resolve_gendered_value(
617        value: &MaybeGendered<String>,
618        requested_gender: Option<GrammaticalGender>,
619    ) -> Option<&str> {
620        value
621            .resolve_with_fallback(requested_gender)
622            .map(String::as_str)
623    }
624
625    fn resolve_gendered_value_neutral(value: &MaybeGendered<String>) -> Option<&str> {
626        value.resolve_neutral().map(String::as_str)
627    }
628
629    fn resolve_no_date_value<'a>(
630        value: &'a SimpleTerm,
631        form: &TermForm,
632        requested_gender: Option<GrammaticalGender>,
633    ) -> Option<&'a str> {
634        match requested_gender {
635            Some(GrammaticalGender::Common) => match *form {
636                TermForm::Long => value
637                    .long
638                    .resolve_strict(Some(GrammaticalGender::Common))
639                    .map(String::as_str),
640                TermForm::Short => value
641                    .short
642                    .resolve_strict(Some(GrammaticalGender::Common))
643                    .map(String::as_str)
644                    .filter(|value| !value.is_empty())
645                    .or_else(|| {
646                        value
647                            .long
648                            .resolve_strict(Some(GrammaticalGender::Common))
649                            .map(String::as_str)
650                    }),
651                _ => value
652                    .long
653                    .resolve_strict(Some(GrammaticalGender::Common))
654                    .map(String::as_str),
655            },
656            _ => match *form {
657                TermForm::Long => Self::resolve_gendered_value(&value.long, requested_gender),
658                TermForm::Short => {
659                    Self::resolve_gendered_value(&value.short, requested_gender.clone())
660                        .filter(|value| !value.is_empty())
661                        .or_else(|| Self::resolve_gendered_value(&value.long, requested_gender))
662                }
663                _ => Self::resolve_gendered_value(&value.long, requested_gender),
664            },
665        }
666    }
667
668    /// Get a contributor role term.
669    pub fn role_term(
670        &self,
671        role: &ContributorRole,
672        plural: bool,
673        form: &TermForm,
674        requested_gender: Option<GrammaticalGender>,
675    ) -> Option<&str> {
676        let term = self.roles.get(role)?;
677        let simple = if plural { &term.plural } else { &term.singular };
678        let term_text = match *form {
679            TermForm::Long => Self::resolve_gendered_value(&simple.long, requested_gender),
680            TermForm::Short => {
681                Self::resolve_gendered_value(&simple.short, requested_gender.clone())
682                    .filter(|value| !value.is_empty())
683                    .or_else(|| Self::resolve_gendered_value(&simple.long, requested_gender))
684            }
685            TermForm::Verb => Self::resolve_gendered_value(&term.verb.long, None),
686            TermForm::VerbShort => Self::resolve_gendered_value(&term.verb.short, None)
687                .filter(|value| !value.is_empty())
688                .or_else(|| Self::resolve_gendered_value(&term.verb.long, None)),
689            _ => Self::resolve_gendered_value(&simple.long, requested_gender),
690        };
691
692        match term_text {
693            Some(value) if !value.is_empty() => Some(value),
694            _ => None,
695        }
696    }
697
698    /// Resolve a contributor role term using only neutral/common values.
699    pub fn role_term_neutral(
700        &self,
701        role: &ContributorRole,
702        plural: bool,
703        form: &TermForm,
704    ) -> Option<&str> {
705        let term = self.roles.get(role)?;
706        let simple = if plural { &term.plural } else { &term.singular };
707        let term_text = match *form {
708            TermForm::Long => Self::resolve_gendered_value_neutral(&simple.long),
709            TermForm::Short => Self::resolve_gendered_value_neutral(&simple.short)
710                .filter(|value| !value.is_empty())
711                .or_else(|| Self::resolve_gendered_value_neutral(&simple.long)),
712            TermForm::Verb => Self::resolve_gendered_value(&term.verb.long, None),
713            TermForm::VerbShort => Self::resolve_gendered_value(&term.verb.short, None)
714                .filter(|value| !value.is_empty())
715                .or_else(|| Self::resolve_gendered_value(&term.verb.long, None)),
716            _ => Self::resolve_gendered_value_neutral(&simple.long),
717        };
718
719        match term_text {
720            Some(value) if !value.is_empty() => Some(value),
721            _ => None,
722        }
723    }
724
725    /// Resolve a contributor role term, evaluating MF2 messages when configured.
726    pub fn resolved_role_term(
727        &self,
728        role: &ContributorRole,
729        plural: bool,
730        form: &TermForm,
731        requested_gender: Option<GrammaticalGender>,
732    ) -> Option<String> {
733        if let Some(message_id) = Self::role_message_id(role, form)
734            && let Some(resolved) = self.resolve_message_text(
735                message_id,
736                Some(u64::from(plural) + 1),
737                requested_gender.clone(),
738            )
739        {
740            return Some(resolved);
741        }
742
743        self.role_term(role, plural, form, requested_gender)
744            .map(ToOwned::to_owned)
745    }
746
747    /// Resolve a contributor role term using only neutral/common values.
748    pub fn resolved_role_term_neutral(
749        &self,
750        role: &ContributorRole,
751        plural: bool,
752        form: &TermForm,
753    ) -> Option<String> {
754        if let Some(message_id) = Self::role_message_id(role, form)
755            && let Some(resolved) = self.resolve_message_text(
756                message_id,
757                Some(u64::from(plural) + 1),
758                Some(GrammaticalGender::Common),
759            )
760        {
761            return Some(resolved);
762        }
763
764        self.role_term_neutral(role, plural, form)
765            .map(ToOwned::to_owned)
766    }
767
768    /// Get a locator term.
769    pub fn locator_term(
770        &self,
771        locator: &LocatorType,
772        plural: bool,
773        form: &TermForm,
774        requested_gender: Option<GrammaticalGender>,
775    ) -> Option<&str> {
776        let term = self.locators.get(locator)?;
777        let form_term = match *form {
778            TermForm::Long => &term.long,
779            TermForm::Short => &term.short,
780            TermForm::Symbol => &term.symbol,
781            _ => &term.short, // Fallback
782        };
783
784        if let Some(ft) = form_term {
785            let value = if plural { &ft.plural } else { &ft.singular };
786            Self::resolve_gendered_value(value, requested_gender)
787        } else {
788            None
789        }
790    }
791
792    /// Resolve a locator term, evaluating MF2 messages when configured.
793    pub fn resolved_locator_term(
794        &self,
795        locator: &LocatorType,
796        plural: bool,
797        form: &TermForm,
798        requested_gender: Option<GrammaticalGender>,
799    ) -> Option<String> {
800        if let Some(message_id) = Self::locator_message_id(locator, form)
801            && let Some(resolved) = self.resolve_message_text(
802                message_id,
803                Some(u64::from(plural) + 1),
804                requested_gender.clone(),
805            )
806        {
807            return Some(resolved);
808        }
809
810        self.locator_term(locator, plural, form, requested_gender.clone())
811            .map(ToOwned::to_owned)
812            .or_else(|| {
813                if let LocatorType::Custom(key) = locator {
814                    self.locator_term_any_form(locator, plural, requested_gender)
815                        .map(ToOwned::to_owned)
816                        .or_else(|| Some(key.clone()))
817                } else {
818                    None
819                }
820            })
821    }
822
823    fn locator_term_any_form(
824        &self,
825        locator: &LocatorType,
826        plural: bool,
827        requested_gender: Option<GrammaticalGender>,
828    ) -> Option<&str> {
829        let term = self.locators.get(locator)?;
830        [&term.long, &term.short, &term.symbol]
831            .into_iter()
832            .flatten()
833            .next()
834            .map(|forms| {
835                if plural {
836                    Self::resolve_gendered_value(&forms.plural, requested_gender).unwrap_or("")
837                } else {
838                    Self::resolve_gendered_value(&forms.singular, requested_gender).unwrap_or("")
839                }
840            })
841            .filter(|value| !value.is_empty())
842    }
843
844    /// Resolve a general term to a borrowed string.
845    pub fn general_term(
846        &self,
847        term: &GeneralTerm,
848        form: &TermForm,
849        requested_gender: Option<GrammaticalGender>,
850    ) -> Option<&str> {
851        // Legacy borrowed lookup path: prefer plain v2 messages first, then
852        // alias-backed messages, and finally the v1 term tables.
853        let candidate_id = format!("term.{}", Self::general_term_to_message_id(term));
854        if let Some(msg) = self.messages.get(&candidate_id) {
855            // Only use plain messages here (no ICU variable syntax)
856            if !msg.contains('{') {
857                return Some(msg.as_str());
858            }
859        }
860        // Check legacy_term_aliases
861        let legacy_key = Self::general_term_to_legacy_key(term);
862        if let Some(msg_id) = self.legacy_term_aliases.get(legacy_key)
863            && let Some(msg) = self.messages.get(msg_id)
864            && !msg.contains('{')
865        {
866            return Some(msg.as_str());
867        }
868
869        // First try the flattened map
870        if *term != GeneralTerm::NoDate
871            && let Some(simple) = self.terms.general.get(term)
872        {
873            return match *form {
874                TermForm::Long => Self::resolve_gendered_value(&simple.long, requested_gender),
875                TermForm::Short => {
876                    Self::resolve_gendered_value(&simple.short, requested_gender.clone())
877                        .filter(|value| !value.is_empty())
878                        .or_else(|| Self::resolve_gendered_value(&simple.long, requested_gender))
879                }
880
881                _ => Self::resolve_gendered_value(&simple.long, requested_gender),
882            };
883        }
884
885        // Fallback to specific fields for common terms
886        match term {
887            GeneralTerm::And => self.terms.and.as_deref(),
888            GeneralTerm::EtAl => self.terms.et_al.as_deref(),
889            GeneralTerm::AndOthers => self.terms.and_others.as_deref(),
890            GeneralTerm::Accessed => self.terms.accessed.as_deref(),
891            GeneralTerm::Ibid => self.terms.ibid.as_deref(),
892            GeneralTerm::In => self.terms.in_.as_deref(),
893            GeneralTerm::NoDate => self
894                .terms
895                .general
896                .get(term)
897                .and_then(|value| Self::resolve_no_date_value(value, form, requested_gender))
898                .or(self.terms.no_date.as_deref()),
899            GeneralTerm::Retrieved => self.terms.retrieved.as_deref(),
900            GeneralTerm::At => self.terms.at.as_deref(),
901            GeneralTerm::By => self.terms.by.as_deref(),
902            GeneralTerm::From => self.terms.from.as_deref(),
903            GeneralTerm::Of => self
904                .terms
905                .general
906                .get(term)
907                .and_then(|value| Self::resolve_gendered_value(&value.long, requested_gender)),
908            GeneralTerm::To => self
909                .terms
910                .general
911                .get(term)
912                .and_then(|value| Self::resolve_gendered_value(&value.long, requested_gender)),
913            GeneralTerm::Anonymous => {
914                Self::resolve_gendered_value(&self.terms.anonymous.long, requested_gender)
915            }
916            GeneralTerm::Circa => {
917                Self::resolve_gendered_value(&self.terms.circa.long, requested_gender)
918            }
919            // Fallback to locators for shared terms
920            GeneralTerm::Volume => {
921                self.locator_term(&LocatorType::Volume, false, form, requested_gender)
922            }
923            GeneralTerm::Issue => {
924                self.locator_term(&LocatorType::Issue, false, form, requested_gender)
925            }
926            GeneralTerm::Page => {
927                self.locator_term(&LocatorType::Page, false, form, requested_gender)
928            }
929            GeneralTerm::Chapter => {
930                self.locator_term(&LocatorType::Chapter, false, form, requested_gender)
931            }
932            GeneralTerm::Section => {
933                self.locator_term(&LocatorType::Section, false, form, requested_gender)
934            }
935            GeneralTerm::Here => self
936                .terms
937                .general
938                .get(term)
939                .and_then(|value| Self::resolve_gendered_value(&value.long, requested_gender)),
940            GeneralTerm::Deposited => self
941                .terms
942                .general
943                .get(term)
944                .and_then(|value| Self::resolve_gendered_value(&value.long, requested_gender)),
945            _ => None,
946        }
947    }
948
949    /// Resolve a general term, evaluating MF2 messages when configured.
950    pub fn resolved_general_term(
951        &self,
952        term: &GeneralTerm,
953        form: &TermForm,
954        requested_gender: Option<GrammaticalGender>,
955    ) -> Option<String> {
956        if let Some(message_id) = Self::general_message_id(term, form)
957            && let Some(resolved) =
958                self.resolve_message_text(message_id, None, requested_gender.clone())
959        {
960            return Some(resolved);
961        }
962
963        self.general_term(term, form, requested_gender)
964            .map(ToOwned::to_owned)
965    }
966
967    /// Resolve an archive hierarchy label, using MF2 messages.
968    /// Returns singular form (count=1) by default.
969    pub fn resolved_archive_term(&self, field: ArchiveHierarchyField) -> Option<String> {
970        self.resolve_message_text(field.message_id(), Some(1), None)
971    }
972
973    /// Get the "and" term based on style preference.
974    pub fn and_term(&self, use_symbol: bool) -> &str {
975        if use_symbol {
976            self.terms.and_symbol.as_deref().unwrap_or("&")
977        } else {
978            self.terms.and.as_deref().unwrap_or("and")
979        }
980    }
981
982    /// Get the "et al." term.
983    pub fn et_al(&self) -> &str {
984        self.terms.et_al.as_deref().unwrap_or("et al.")
985    }
986
987    /// Get a month name.
988    pub fn month_name(&self, month: u8, short: bool) -> &str {
989        let idx = (month.saturating_sub(1)) as usize;
990        if short {
991            self.dates
992                .months
993                .short
994                .get(idx)
995                .map(|s| s.as_str())
996                .unwrap_or("")
997        } else {
998            self.dates
999                .months
1000                .long
1001                .get(idx)
1002                .map(|s| s.as_str())
1003                .unwrap_or("")
1004        }
1005    }
1006
1007    /// Map a GeneralTerm to its canonical message ID suffix (e.g., GeneralTerm::EtAl → "et-al").
1008    fn general_term_to_message_id(term: &GeneralTerm) -> &str {
1009        match term {
1010            GeneralTerm::And => "and",
1011            GeneralTerm::EtAl => "et-al",
1012            GeneralTerm::AndOthers => "and-others",
1013            GeneralTerm::Accessed => "accessed",
1014            GeneralTerm::Retrieved => "retrieved",
1015            GeneralTerm::NoDate => "no-date",
1016            GeneralTerm::Ibid => "ibid",
1017            GeneralTerm::In => "in",
1018            GeneralTerm::At => "at",
1019            GeneralTerm::By => "by",
1020            GeneralTerm::From => "from",
1021            GeneralTerm::Of => "of",
1022            GeneralTerm::To => "to",
1023            GeneralTerm::Anonymous => "anonymous",
1024            GeneralTerm::Circa => "circa",
1025            GeneralTerm::Forthcoming => "forthcoming",
1026            GeneralTerm::Online => "online",
1027            GeneralTerm::AvailableAt => "available-at",
1028            GeneralTerm::ReviewOf => "review-of",
1029            GeneralTerm::Here => "here",
1030            GeneralTerm::Deposited => "deposited",
1031            GeneralTerm::Patent => "patent",
1032            GeneralTerm::Volume => "volume",
1033            GeneralTerm::Issue => "issue",
1034            GeneralTerm::Page => "page",
1035            GeneralTerm::Chapter => "chapter",
1036            GeneralTerm::Edition => "edition",
1037            GeneralTerm::Section => "section",
1038            GeneralTerm::OriginalWorkPublished => "original-work-published",
1039            GeneralTerm::PersonalCommunication => "personal-communication",
1040            GeneralTerm::Unknown(s) => s.as_str(),
1041        }
1042    }
1043
1044    /// Map a GeneralTerm to its legacy CSL key string for alias lookup.
1045    fn general_term_to_legacy_key(term: &GeneralTerm) -> &str {
1046        match term {
1047            GeneralTerm::EtAl => "et_al",
1048            GeneralTerm::NoDate => "no_date",
1049            _ => Self::general_term_to_message_id(term),
1050        }
1051    }
1052
1053    fn role_message_id(role: &ContributorRole, form: &TermForm) -> Option<&'static str> {
1054        let prefix = match role {
1055            ContributorRole::Editor => "role.editor",
1056            ContributorRole::Translator => "role.translator",
1057            ContributorRole::Guest => "role.guest",
1058            _ => return None,
1059        };
1060
1061        match *form {
1062            TermForm::Long => Some(match prefix {
1063                "role.editor" => "role.editor.label-long",
1064                "role.translator" => "role.translator.label-long",
1065                "role.guest" => "role.guest.label-long",
1066                _ => return None,
1067            }),
1068            TermForm::Short => Some(match prefix {
1069                "role.editor" => "role.editor.label",
1070                "role.translator" => "role.translator.label",
1071                "role.guest" => "role.guest.label",
1072                _ => return None,
1073            }),
1074            TermForm::Verb | TermForm::VerbShort => Some(match prefix {
1075                "role.editor" => "role.editor.verb",
1076                "role.translator" => "role.translator.verb",
1077                "role.guest" => "role.guest.verb",
1078                _ => return None,
1079            }),
1080            _ => None,
1081        }
1082    }
1083
1084    fn locator_message_id(locator: &LocatorType, form: &TermForm) -> Option<&'static str> {
1085        let prefix = match locator {
1086            LocatorType::Page => "term.page-label",
1087            LocatorType::Chapter => "term.chapter-label",
1088            LocatorType::Volume => "term.volume-label",
1089            LocatorType::Section => "term.section-label",
1090            LocatorType::Figure => "term.figure-label",
1091            LocatorType::Note => "term.note-label",
1092            _ => return None,
1093        };
1094
1095        match *form {
1096            TermForm::Long => Some(match prefix {
1097                "term.page-label" => "term.page-label-long",
1098                "term.chapter-label" => "term.chapter-label-long",
1099                "term.volume-label" => "term.volume-label-long",
1100                "term.section-label" => "term.section-label-long",
1101                "term.figure-label" => "term.figure-label-long",
1102                "term.note-label" => "term.note-label-long",
1103                _ => return None,
1104            }),
1105            TermForm::Short => Some(prefix),
1106            _ => None,
1107        }
1108    }
1109
1110    fn general_message_id(term: &GeneralTerm, form: &TermForm) -> Option<&'static str> {
1111        match (term, form) {
1112            (GeneralTerm::And, _) => Some("term.and"),
1113            (GeneralTerm::EtAl, _) => Some("term.et-al"),
1114            (GeneralTerm::AndOthers, _) => Some("term.and-others"),
1115            (GeneralTerm::Accessed, _) => Some("term.accessed"),
1116            (GeneralTerm::Retrieved, _) => Some("term.retrieved"),
1117            (GeneralTerm::NoDate, TermForm::Long) => Some("term.no-date-long"),
1118            (GeneralTerm::NoDate, _) => Some("term.no-date"),
1119            (GeneralTerm::Forthcoming, _) => Some("term.forthcoming"),
1120            (GeneralTerm::Circa, TermForm::Long) => Some("term.circa-long"),
1121            (GeneralTerm::Circa, _) => Some("term.circa"),
1122            _ => None,
1123        }
1124    }
1125
1126    fn gender_selector_key(gender: &GrammaticalGender) -> &str {
1127        match gender {
1128            GrammaticalGender::Masculine => "masculine",
1129            GrammaticalGender::Feminine => "feminine",
1130            GrammaticalGender::Neuter => "neuter",
1131            GrammaticalGender::Common => "common",
1132            GrammaticalGender::Unknown(s) => s.as_str(),
1133        }
1134    }
1135
1136    fn resolve_message_text(
1137        &self,
1138        message_id: &str,
1139        count: Option<u64>,
1140        gender: Option<GrammaticalGender>,
1141    ) -> Option<String> {
1142        let message = self.messages.get(message_id)?;
1143
1144        // Build MessageArgs for the evaluator
1145        let args = MessageArgs {
1146            count,
1147            gender: gender.as_ref().map(Self::gender_selector_key),
1148            ..MessageArgs::default()
1149        };
1150
1151        // Store variables as owned Strings and then reference them
1152        // (This is a limitation of the current design; could be improved with owned args)
1153        // For now, we'll build a simple approach: just return the message if it's static
1154        if !message.contains('{') {
1155            return Some(message.clone());
1156        }
1157
1158        // If no parameterized syntax, return None (fallback to legacy terms)
1159        if self.evaluation.message_syntax == MessageSyntax::Static {
1160            return None;
1161        }
1162
1163        // Create a temporary variables map for the evaluator
1164        // This is a simplification; the evaluator trait should ideally accept the args directly
1165        // For now, we'll convert and handle in the message evaluator
1166        self.evaluator.evaluate(message, &args)
1167    }
1168
1169    /// Resolve a `pattern.date-*` message with locale-specific year/month/day
1170    /// components.
1171    ///
1172    /// Returns `Some(rendered)` only when the locale carries an MF2 message
1173    /// at `message_id` and the evaluator produces output. Callers fall back
1174    /// to the engine's hardcoded English assembly on `None`.
1175    ///
1176    /// A component is forwarded to the evaluator only when non-empty; an
1177    /// authored pattern that references `{$day}` therefore yields `None` if
1178    /// the input date carries no day, letting the caller pick a shorter form.
1179    ///
1180    /// The day argument is taken as `Option<u32>` rather than a pre-formatted
1181    /// string so the digit-to-string allocation is deferred until after the
1182    /// message lookup succeeds — the common case for legacy locales (`en-US`,
1183    /// every v1 file) is the lookup miss, which now incurs zero allocation.
1184    pub fn resolve_date_pattern(
1185        &self,
1186        message_id: &str,
1187        year: Option<&str>,
1188        month: Option<&str>,
1189        day: Option<u32>,
1190    ) -> Option<String> {
1191        let message = self.messages.get(message_id)?;
1192        if self.evaluation.message_syntax == MessageSyntax::Static {
1193            return None;
1194        }
1195
1196        let day_str = day.map(|d| d.to_string());
1197        let args = MessageArgs {
1198            year: year.filter(|s| !s.is_empty()),
1199            month: month.filter(|s| !s.is_empty()),
1200            day: day_str.as_deref(),
1201            ..MessageArgs::default()
1202        };
1203        self.evaluator.evaluate(message, &args)
1204    }
1205}
1206
1207impl Locale {
1208    /// Load a locale from a YAML string.
1209    ///
1210    /// # Errors
1211    ///
1212    /// Returns an error when the YAML cannot be parsed into a locale.
1213    pub fn from_yaml_str(yaml: &str) -> Result<Self, String> {
1214        let raw: raw::RawLocale = serde_yaml::from_str(yaml)
1215            .map_err(|e| format!("Failed to parse locale YAML: {}", e))?;
1216
1217        Ok(Self::from_raw(raw))
1218    }
1219
1220    /// Load a locale by ID (e.g., "en-US", "de-DE") from a locales directory.
1221    /// Falls back to en-US if the locale file is not found.
1222    pub fn load(locale_id: &str, locales_dir: &std::path::Path) -> Self {
1223        let extensions = ["yaml", "yml", "json", "cbor"];
1224
1225        for ext in &extensions {
1226            let file_name = format!("{}.{}", locale_id, ext);
1227            let file_path = locales_dir.join(&file_name);
1228
1229            if file_path.exists() {
1230                match Self::from_file(&file_path) {
1231                    Ok(locale) => return locale,
1232                    Err(e) => {
1233                        eprintln!(
1234                            "Warning: Failed to load locale {}.{}: {}",
1235                            locale_id, ext, e
1236                        );
1237                    }
1238                }
1239            }
1240        }
1241
1242        // Try fallback to base locale (e.g., "de" from "de-DE")
1243        if locale_id.contains('-') {
1244            let base = locale_id.split('-').next().unwrap_or("en");
1245            // Try all files that start with base
1246            if let Ok(entries) = std::fs::read_dir(locales_dir) {
1247                for entry in entries.flatten() {
1248                    let name = entry.file_name();
1249                    let name_str = name.to_string_lossy();
1250                    if (name_str.starts_with(base)
1251                        && extensions.iter().any(|ext| name_str.ends_with(ext)))
1252                        && let Ok(locale) = Self::from_file(&entry.path())
1253                    {
1254                        return locale;
1255                    }
1256                }
1257            }
1258        }
1259
1260        // Default to hardcoded en-US
1261        Self::en_us()
1262    }
1263
1264    /// Load locale from a file path directly (detects format).
1265    ///
1266    /// # Errors
1267    ///
1268    /// Returns an error when the file cannot be read or its contents cannot be
1269    /// parsed as a supported locale format.
1270    pub fn from_file(path: &std::path::Path) -> Result<Self, String> {
1271        let bytes =
1272            std::fs::read(path).map_err(|e| format!("Failed to read locale file: {}", e))?;
1273        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("yaml");
1274
1275        match ext {
1276            "cbor" => ciborium::de::from_reader::<raw::RawLocale, _>(std::io::Cursor::new(&bytes))
1277                .map(Self::from_raw)
1278                .map_err(|e| format!("Failed to parse CBOR locale: {}", e)),
1279            "json" => serde_json::from_slice::<raw::RawLocale>(&bytes)
1280                .map(Self::from_raw)
1281                .map_err(|e| format!("Failed to parse JSON locale: {}", e)),
1282            _ => {
1283                let content = String::from_utf8_lossy(&bytes);
1284                Self::from_yaml_str(&content)
1285            }
1286        }
1287    }
1288
1289    /// Convert a RawLocale to a Locale.
1290    #[allow(
1291        clippy::too_many_lines,
1292        reason = "Complex parsing of raw locale data with multiple term types"
1293    )]
1294    fn from_raw(raw: raw::RawLocale) -> Self {
1295        // Determine punctuation-in-quote from locale ID
1296        // en-US uses American style (inside), en-GB and others use outside
1297        let punctuation_in_quote = raw.locale.starts_with("en-US")
1298            || (raw.locale.starts_with("en") && !raw.locale.starts_with("en-GB"));
1299
1300        // Start from en-US defaults so partially specified locale files still
1301        // have complete term/locator coverage (e.g., page/section labels).
1302        let mut locale = Locale::en_us();
1303        locale.locale = raw.locale.clone();
1304        locale.dates = DateTerms {
1305            months: MonthNames {
1306                long: raw.dates.months.long,
1307                short: raw.dates.months.short,
1308            },
1309            seasons: raw.dates.seasons,
1310            uncertainty_term: raw.dates.uncertainty_term,
1311            open_ended_term: raw.dates.open_ended_term,
1312            am: raw.dates.am,
1313            pm: raw.dates.pm,
1314            timezone_utc: raw.dates.timezone_utc,
1315            before_era: raw.dates.before_era,
1316            ad: raw.dates.ad,
1317            bc: raw.dates.bc,
1318            bce: raw.dates.bce,
1319            ce: raw.dates.ce,
1320        };
1321        locale.punctuation_in_quote = punctuation_in_quote;
1322        // Set locale-specific articles based on language
1323        locale.sort_articles = Self::default_articles_for_locale(&raw.locale);
1324
1325        // v2 schema fields — copied verbatim (no structural transformation needed at this layer)
1326        locale.locale_schema_version = raw.locale_schema_version;
1327        locale.evaluation = raw.evaluation.unwrap_or_default();
1328        locale.messages = raw.messages;
1329        locale.date_formats = raw.date_formats;
1330        locale.legacy_term_aliases = raw.legacy_term_aliases;
1331
1332        // Merge vocab overrides into the embedded en-US defaults.
1333        if let Some(raw_vocab) = raw.vocab {
1334            locale.vocab.genre.extend(raw_vocab.genre);
1335            locale.vocab.medium.extend(raw_vocab.medium);
1336        }
1337
1338        // Merge grammar_options: use raw value if present, otherwise derive from locale ID
1339        if let Some(go) = raw.grammar_options {
1340            locale.grammar_options = go;
1341        } else {
1342            // Derive punctuation_in_quote from locale ID (preserving existing behaviour)
1343            locale.grammar_options.punctuation_in_quote = locale.punctuation_in_quote;
1344        }
1345        // For v2 locales that explicitly declare grammar_options, the grammar_options
1346        // field is the authoritative source. Sync back to the legacy punctuation_in_quote
1347        // field so all existing call sites in citum-engine get the correct value.
1348        locale.punctuation_in_quote = locale.grammar_options.punctuation_in_quote;
1349
1350        // Merge number_formats if provided
1351        if let Some(nf) = raw.number_formats {
1352            locale.number_formats = nf;
1353        }
1354
1355        let explicit_locator_keys: std::collections::HashSet<LocatorType> = raw
1356            .locators
1357            .keys()
1358            .filter_map(|key| Self::parse_builtin_locator_type(key))
1359            .collect();
1360
1361        for (key, value) in &raw.locators {
1362            if let Some(locator_type) = Self::parse_locator_type(key) {
1363                let locator_term = LocatorTerm {
1364                    long: Self::extract_singular_plural(value.long.as_ref().as_ref()),
1365                    short: Self::extract_singular_plural(value.short.as_ref().as_ref()),
1366                    symbol: Self::extract_singular_plural(value.symbol.as_ref().as_ref()),
1367                    gender: value.gender.clone(),
1368                };
1369                locale.locators.insert(locator_type, locator_term);
1370            }
1371        }
1372
1373        // Map raw terms to structured general terms.
1374        for (key, value) in &raw.terms {
1375            if let Some(locator_type) = Self::parse_builtin_locator_type(key)
1376                && !explicit_locator_keys.contains(&locator_type)
1377                && let Some(forms) = Self::get_forms(value)
1378            {
1379                let locator_term = LocatorTerm {
1380                    long: Self::extract_singular_plural(forms.get("long").as_ref()),
1381                    short: Self::extract_singular_plural(forms.get("short").as_ref()),
1382                    symbol: Self::extract_singular_plural(forms.get("symbol").as_ref()),
1383                    gender: None,
1384                };
1385                locale.locators.insert(locator_type, locator_term);
1386                continue;
1387            }
1388
1389            match key.as_str() {
1390                "and" => {
1391                    if let Some(forms) = Self::get_forms(value) {
1392                        if let Some(v) = forms.get("long").and_then(|v| v.as_string()) {
1393                            locale.terms.and = Some(v.to_string());
1394                        }
1395                        if let Some(v) = forms.get("symbol").and_then(|v| v.as_string()) {
1396                            locale.terms.and_symbol = Some(v.to_string());
1397                        }
1398                    }
1399                }
1400                "et_al" => {
1401                    if let Some(forms) = Self::get_forms(value)
1402                        && let Some(v) = forms.get("long").and_then(|v| v.as_string())
1403                    {
1404                        locale.terms.et_al = Some(v.to_string());
1405                    }
1406                }
1407                "and others" | "and_others" => {
1408                    if let Some(forms) = Self::get_forms(value)
1409                        && let Some(v) = forms.get("long").and_then(|v| v.as_string())
1410                    {
1411                        locale.terms.and_others = Some(v.to_string());
1412                    }
1413                }
1414                "accessed" => {
1415                    if let Some(forms) = Self::get_forms(value)
1416                        && let Some(v) = forms.get("long").and_then(|v| v.as_string())
1417                    {
1418                        locale.terms.accessed = Some(v.to_string());
1419                    }
1420                }
1421                "ibid" => {
1422                    if let Some(forms) = Self::get_forms(value)
1423                        && let Some(v) = forms.get("long").and_then(|v| v.as_string())
1424                    {
1425                        locale.terms.ibid = Some(v.to_string());
1426                    }
1427                }
1428                "no date" => {
1429                    let simple = Self::extract_simple_term_from_raw(value);
1430                    let short_fallback = simple.short.as_default_str().to_string();
1431                    locale.terms.general.insert(GeneralTerm::NoDate, simple);
1432                    locale.terms.no_date.get_or_insert(short_fallback);
1433                }
1434                "no_date" => {
1435                    let simple = Self::extract_simple_term_from_raw(value);
1436                    locale.terms.no_date = Some(simple.short.as_str().to_string());
1437                    locale
1438                        .terms
1439                        .general
1440                        .entry(GeneralTerm::NoDate)
1441                        .or_insert(simple);
1442                }
1443                _ => {
1444                    // Try to parse as GeneralTerm
1445                    if let Some(general_term) = Self::parse_general_term(key) {
1446                        let simple = Self::extract_simple_term_from_raw(value);
1447                        locale.terms.general.insert(general_term, simple);
1448                    }
1449                }
1450            }
1451        }
1452
1453        // Map raw roles to structured roles (simplified for now)
1454        for (key, role_term) in &raw.roles {
1455            if let Some(role) = Self::parse_role_name(key) {
1456                let contributor_term = ContributorTerm {
1457                    singular: Self::extract_simple_term(&role_term.long, &role_term.short, false),
1458                    plural: Self::extract_simple_term(&role_term.long, &role_term.short, true),
1459                    verb: Self::extract_verb_term(&role_term.verb, &role_term.verb_short),
1460                };
1461                locale.roles.insert(role, contributor_term);
1462            }
1463        }
1464
1465        // Set the message evaluator based on evaluation.message_syntax
1466        locale.evaluator = match locale.evaluation.message_syntax {
1467            MessageSyntax::Mf2 => Arc::new(Mf2MessageEvaluator),
1468            MessageSyntax::Static => Arc::new(Mf2MessageEvaluator),
1469        };
1470
1471        locale
1472    }
1473
1474    fn get_forms(value: &raw::RawTermValue) -> Option<&HashMap<String, raw::RawTermValue>> {
1475        match value {
1476            raw::RawTermValue::Forms(forms) => Some(forms),
1477            _ => None,
1478        }
1479    }
1480
1481    fn parse_locator_type(name: &str) -> Option<LocatorType> {
1482        LocatorType::from_key(name).ok()
1483    }
1484
1485    fn parse_builtin_locator_type(name: &str) -> Option<LocatorType> {
1486        match Self::parse_locator_type(name)? {
1487            LocatorType::Custom(_) => None,
1488            locator => Some(locator),
1489        }
1490    }
1491
1492    fn parse_role_name(name: &str) -> Option<ContributorRole> {
1493        match name {
1494            "author" => Some(ContributorRole::Author),
1495            "chair" => Some(ContributorRole::Chair),
1496            "editor" => Some(ContributorRole::Editor),
1497            "translator" => Some(ContributorRole::Translator),
1498            "director" => Some(ContributorRole::Director),
1499            "compiler" => Some(ContributorRole::Composer), // Close mapping
1500            "illustrator" => Some(ContributorRole::Illustrator),
1501            "collection-editor" => Some(ContributorRole::CollectionEditor),
1502            "container-author" => Some(ContributorRole::ContainerAuthor),
1503            "editorial-director" => Some(ContributorRole::EditorialDirector),
1504            "textual-editor" | "textual_editor" => Some(ContributorRole::TextualEditor),
1505            "interviewer" => Some(ContributorRole::Interviewer),
1506            "original-author" => Some(ContributorRole::OriginalAuthor),
1507            "recipient" => Some(ContributorRole::Recipient),
1508            "reviewed-author" => Some(ContributorRole::ReviewedAuthor),
1509            "composer" => Some(ContributorRole::Composer),
1510            _ => None,
1511        }
1512    }
1513
1514    fn extract_singular_plural(value: Option<&&raw::RawTermValue>) -> Option<SingularPlural> {
1515        match value {
1516            Some(raw::RawTermValue::SingularPlural { singular, plural }) => Some(SingularPlural {
1517                singular: Self::from_raw_gendered_string(singular),
1518                plural: Self::from_raw_gendered_string(plural),
1519            }),
1520            Some(raw::RawTermValue::Simple(s)) => Some(SingularPlural {
1521                singular: MaybeGendered::Plain(s.clone()),
1522                plural: MaybeGendered::Plain(s.clone()), // Fallback if only one form provided
1523            }),
1524            Some(raw::RawTermValue::Gendered {
1525                masculine,
1526                feminine,
1527                neuter,
1528                common,
1529            }) => Some(SingularPlural {
1530                singular: MaybeGendered::Gendered {
1531                    masculine: masculine.clone(),
1532                    feminine: feminine.clone(),
1533                    neuter: neuter.clone(),
1534                    common: common.clone(),
1535                },
1536                plural: MaybeGendered::Gendered {
1537                    masculine: masculine.clone(),
1538                    feminine: feminine.clone(),
1539                    neuter: neuter.clone(),
1540                    common: common.clone(),
1541                },
1542            }),
1543            Some(raw::RawTermValue::Forms(forms)) => {
1544                let singular = forms
1545                    .get("singular")
1546                    .map(Self::extract_maybe_gendered_string);
1547                let plural = forms.get("plural").map(Self::extract_maybe_gendered_string);
1548
1549                singular.map(|s| SingularPlural {
1550                    plural: plural.unwrap_or_else(|| s.clone()),
1551                    singular: s,
1552                })
1553            }
1554            _ => None,
1555        }
1556    }
1557
1558    fn extract_simple_term(
1559        long: &Option<raw::RawTermValue>,
1560        short: &Option<raw::RawTermValue>,
1561        plural: bool,
1562    ) -> SimpleTerm {
1563        let long_str = long
1564            .as_ref()
1565            .map(|v| Self::extract_simple_gendered_term(v, plural))
1566            .unwrap_or_default();
1567
1568        let short_str = short
1569            .as_ref()
1570            .map(|v| Self::extract_simple_gendered_term(v, plural))
1571            .unwrap_or_default();
1572
1573        SimpleTerm {
1574            long: long_str,
1575            short: short_str,
1576        }
1577    }
1578
1579    fn extract_verb_term(
1580        verb: &Option<raw::RawTermValue>,
1581        verb_short: &Option<raw::RawTermValue>,
1582    ) -> SimpleTerm {
1583        let long_str = verb
1584            .as_ref()
1585            .and_then(|v| v.as_string())
1586            .unwrap_or("")
1587            .into();
1588
1589        let short_str = verb_short
1590            .as_ref()
1591            .and_then(|v| v.as_string())
1592            .unwrap_or("")
1593            .into();
1594
1595        SimpleTerm {
1596            long: long_str,
1597            short: short_str,
1598        }
1599    }
1600
1601    /// Normalize a locale term key to canonical kebab-case.
1602    ///
1603    /// Locale YAML files and style templates may use underscores or spaces
1604    /// interchangeably with hyphens (e.g. `no_date`, `no date`, `no-date`).
1605    /// This helper converts all three forms to the single canonical
1606    /// kebab-case key so `parse_general_term` only needs to match one pattern
1607    /// per term.
1608    fn normalize_term_key(s: &str) -> String {
1609        s.replace(['_', ' '], "-")
1610    }
1611
1612    /// Parse a locale term key into a structured general-term identifier.
1613    pub fn parse_general_term(name: &str) -> Option<GeneralTerm> {
1614        match Self::normalize_term_key(name).as_str() {
1615            "in" => Some(GeneralTerm::In),
1616            "accessed" => Some(GeneralTerm::Accessed),
1617            "retrieved" => Some(GeneralTerm::Retrieved),
1618            "at" => Some(GeneralTerm::At),
1619            "from" => Some(GeneralTerm::From),
1620            "of" => Some(GeneralTerm::Of),
1621            "to" => Some(GeneralTerm::To),
1622            "by" => Some(GeneralTerm::By),
1623            "no-date" => Some(GeneralTerm::NoDate),
1624            "anonymous" => Some(GeneralTerm::Anonymous),
1625            "circa" => Some(GeneralTerm::Circa),
1626            "available-at" => Some(GeneralTerm::AvailableAt),
1627            "ibid" => Some(GeneralTerm::Ibid),
1628            "and" => Some(GeneralTerm::And),
1629            "et-al" => Some(GeneralTerm::EtAl),
1630            "and-others" => Some(GeneralTerm::AndOthers),
1631            "forthcoming" => Some(GeneralTerm::Forthcoming),
1632            "online" => Some(GeneralTerm::Online),
1633            "here" => Some(GeneralTerm::Here),
1634            "deposited" => Some(GeneralTerm::Deposited),
1635            "review-of" => Some(GeneralTerm::ReviewOf),
1636            "original-work-published" => Some(GeneralTerm::OriginalWorkPublished),
1637            "personal-communication" => Some(GeneralTerm::PersonalCommunication),
1638            "patent" => Some(GeneralTerm::Patent),
1639            "volume" => Some(GeneralTerm::Volume),
1640            "issue" => Some(GeneralTerm::Issue),
1641            "page" => Some(GeneralTerm::Page),
1642            "chapter" => Some(GeneralTerm::Chapter),
1643            "edition" => Some(GeneralTerm::Edition),
1644            "section" => Some(GeneralTerm::Section),
1645            _ => None,
1646        }
1647    }
1648
1649    fn extract_simple_term_from_raw(value: &raw::RawTermValue) -> SimpleTerm {
1650        match value {
1651            raw::RawTermValue::Simple(s) => SimpleTerm {
1652                long: s.clone().into(),
1653                short: s.clone().into(),
1654            },
1655            raw::RawTermValue::Gendered {
1656                masculine,
1657                feminine,
1658                neuter,
1659                common,
1660            } => SimpleTerm {
1661                long: MaybeGendered::Gendered {
1662                    masculine: masculine.clone(),
1663                    feminine: feminine.clone(),
1664                    neuter: neuter.clone(),
1665                    common: common.clone(),
1666                },
1667                short: MaybeGendered::Gendered {
1668                    masculine: masculine.clone(),
1669                    feminine: feminine.clone(),
1670                    neuter: neuter.clone(),
1671                    common: common.clone(),
1672                },
1673            },
1674            raw::RawTermValue::Forms(forms) => {
1675                let long = forms
1676                    .get("long")
1677                    .map(Self::extract_maybe_gendered_string)
1678                    .unwrap_or_default();
1679                let short = forms
1680                    .get("short")
1681                    .map(Self::extract_maybe_gendered_string)
1682                    .unwrap_or_else(|| long.clone());
1683                SimpleTerm { long, short }
1684            }
1685            raw::RawTermValue::SingularPlural { singular, .. } => SimpleTerm {
1686                long: Self::from_raw_gendered_string(singular),
1687                short: Self::from_raw_gendered_string(singular),
1688            },
1689        }
1690    }
1691
1692    fn from_raw_gendered_string(value: &raw::RawGenderedString) -> MaybeGendered<String> {
1693        match value {
1694            raw::RawGenderedString::Simple(value) => MaybeGendered::Plain(value.clone()),
1695            raw::RawGenderedString::Gendered {
1696                masculine,
1697                feminine,
1698                neuter,
1699                common,
1700            } => MaybeGendered::Gendered {
1701                masculine: masculine.clone(),
1702                feminine: feminine.clone(),
1703                neuter: neuter.clone(),
1704                common: common.clone(),
1705            },
1706        }
1707    }
1708
1709    fn extract_maybe_gendered_string(value: &raw::RawTermValue) -> MaybeGendered<String> {
1710        match value {
1711            raw::RawTermValue::Simple(value) => MaybeGendered::Plain(value.clone()),
1712            raw::RawTermValue::Gendered {
1713                masculine,
1714                feminine,
1715                neuter,
1716                common,
1717            } => MaybeGendered::Gendered {
1718                masculine: masculine.clone(),
1719                feminine: feminine.clone(),
1720                neuter: neuter.clone(),
1721                common: common.clone(),
1722            },
1723            raw::RawTermValue::SingularPlural { singular, .. } => {
1724                Self::from_raw_gendered_string(singular)
1725            }
1726            raw::RawTermValue::Forms(forms) => forms
1727                .get("long")
1728                .or_else(|| forms.get("singular"))
1729                .map(Self::extract_maybe_gendered_string)
1730                .unwrap_or_default(),
1731        }
1732    }
1733
1734    fn extract_simple_gendered_term(
1735        value: &raw::RawTermValue,
1736        plural: bool,
1737    ) -> MaybeGendered<String> {
1738        match value {
1739            raw::RawTermValue::Simple(value) => MaybeGendered::Plain(value.clone()),
1740            raw::RawTermValue::Gendered {
1741                masculine,
1742                feminine,
1743                neuter,
1744                common,
1745            } => MaybeGendered::Gendered {
1746                masculine: masculine.clone(),
1747                feminine: feminine.clone(),
1748                neuter: neuter.clone(),
1749                common: common.clone(),
1750            },
1751            raw::RawTermValue::SingularPlural {
1752                singular,
1753                plural: plural_value,
1754            } => {
1755                if plural {
1756                    Self::from_raw_gendered_string(plural_value)
1757                } else {
1758                    Self::from_raw_gendered_string(singular)
1759                }
1760            }
1761            raw::RawTermValue::Forms(forms) => {
1762                let key = if plural { "plural" } else { "singular" };
1763                forms
1764                    .get(key)
1765                    .or_else(|| forms.get("long"))
1766                    .map(Self::extract_maybe_gendered_string)
1767                    .unwrap_or_default()
1768            }
1769        }
1770    }
1771
1772    /// Apply a partial override, merging its fields into this locale.
1773    ///
1774    /// Performs key-by-key insertion or replacement for:
1775    /// - `messages`: new or updated message IDs
1776    /// - `grammar_options`: if `Some`, replaces the entire block and syncs
1777    ///   `punctuation_in_quote` field
1778    /// - `legacy_term_aliases`: new or updated term aliases
1779    pub fn apply_override(&mut self, ov: &LocaleOverride) {
1780        for (k, v) in &ov.messages {
1781            self.messages.insert(k.clone(), v.clone());
1782        }
1783        if let Some(go) = &ov.grammar_options {
1784            self.grammar_options = go.clone();
1785            self.punctuation_in_quote = go.punctuation_in_quote;
1786        }
1787        for (k, v) in &ov.legacy_term_aliases {
1788            self.legacy_term_aliases.insert(k.clone(), v.clone());
1789        }
1790    }
1791}
1792
1793#[cfg(test)]
1794#[allow(
1795    clippy::unwrap_used,
1796    clippy::expect_used,
1797    clippy::panic,
1798    clippy::indexing_slicing,
1799    clippy::todo,
1800    clippy::unimplemented,
1801    clippy::unreachable,
1802    clippy::get_unwrap,
1803    reason = "Panicking is acceptable and often desired in tests."
1804)]
1805mod tests {
1806    use super::*;
1807
1808    #[test]
1809    fn test_en_us_locale() {
1810        let locale = Locale::en_us();
1811        assert_eq!(locale.locale, "en-US");
1812        assert_eq!(locale.and_term(false), "and");
1813        assert_eq!(locale.and_term(true), "&");
1814        assert_eq!(locale.et_al(), "et al.");
1815    }
1816
1817    #[test]
1818    fn test_month_names() {
1819        let locale = Locale::en_us();
1820        assert_eq!(locale.month_name(1, false), "January");
1821        assert_eq!(locale.month_name(1, true), "Jan.");
1822        assert_eq!(locale.month_name(12, false), "December");
1823    }
1824
1825    #[test]
1826    fn test_role_terms() {
1827        let locale = Locale::en_us();
1828
1829        assert_eq!(
1830            locale.role_term(&ContributorRole::Editor, false, &TermForm::Short, None),
1831            Some("ed.")
1832        );
1833        assert_eq!(
1834            locale.role_term(&ContributorRole::Editor, true, &TermForm::Short, None),
1835            Some("eds.")
1836        );
1837        assert_eq!(
1838            locale.role_term(&ContributorRole::Translator, false, &TermForm::Verb, None),
1839            Some("translated by")
1840        );
1841    }
1842
1843    #[test]
1844    fn test_no_date_term_resolves_long_and_short_forms() {
1845        let locale = Locale::en_us();
1846
1847        assert_eq!(
1848            locale.general_term(&GeneralTerm::NoDate, &TermForm::Long, None),
1849            Some("no date")
1850        );
1851        assert_eq!(
1852            locale.general_term(&GeneralTerm::NoDate, &TermForm::Short, None),
1853            Some("n.d.")
1854        );
1855    }
1856
1857    #[test]
1858    fn test_no_date_term_falls_back_to_legacy_short_form() {
1859        let mut locale = Locale::default();
1860        locale.terms.no_date = Some("n.d.".to_string());
1861
1862        assert_eq!(
1863            locale.general_term(&GeneralTerm::NoDate, &TermForm::Short, None),
1864            Some("n.d.")
1865        );
1866        assert_eq!(
1867            locale.general_term(&GeneralTerm::NoDate, &TermForm::Long, None),
1868            Some("n.d.")
1869        );
1870    }
1871
1872    #[test]
1873    fn test_locale_deserialization() {
1874        let json = r#"{
1875            "locale": "en-US",
1876            "dates": {
1877                "months": {
1878                    "long": ["January", "February", "March", "April", "May", "June",
1879                             "July", "August", "September", "October", "November", "December"],
1880                    "short": ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
1881                              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
1882                },
1883                "seasons": ["Spring", "Summer", "Autumn", "Winter"]
1884            },
1885            "roles": {},
1886            "terms": {
1887                "and": "and",
1888                "et-al": "et al."
1889            }
1890        }"#;
1891
1892        let locale: Locale = serde_json::from_str(json).unwrap();
1893        assert_eq!(locale.locale, "en-US");
1894        assert_eq!(locale.dates.months.long[0], "January");
1895        assert_eq!(locale.terms.and.as_ref().unwrap(), "and");
1896    }
1897
1898    #[test]
1899    fn test_yaml_locale_loading() {
1900        let yaml = r#"
1901locale: de-DE
1902dates:
1903  months:
1904    long:
1905      - Januar
1906      - Februar
1907      - März
1908      - April
1909      - Mai
1910      - Juni
1911      - Juli
1912      - August
1913      - September
1914      - Oktober
1915      - November
1916      - Dezember
1917    short:
1918      - Jan.
1919      - Feb.
1920      - März
1921      - Apr.
1922      - Mai
1923      - Juni
1924      - Juli
1925      - Aug.
1926      - Sep.
1927      - Okt.
1928      - Nov.
1929      - Dez.
1930  seasons:
1931    - Frühling
1932    - Sommer
1933    - Herbst
1934    - Winter
1935terms:
1936  and:
1937    long: und
1938    symbol: "&"
1939  et_al:
1940    long: "u. a."
1941"#;
1942
1943        let locale = Locale::from_yaml_str(yaml).unwrap();
1944        assert_eq!(locale.locale, "de-DE");
1945        assert_eq!(locale.and_term(false), "und");
1946        assert_eq!(locale.et_al(), "u. a.");
1947        assert_eq!(locale.month_name(1, false), "Januar");
1948        assert_eq!(locale.month_name(3, false), "März");
1949    }
1950
1951    #[test]
1952    fn test_yaml_no_date_term_preserves_long_and_short_forms() {
1953        let yaml = r#"
1954locale: en-US
1955dates:
1956  months:
1957    long: [January, February, March, April, May, June, July, August, September, October, November, December]
1958    short: [Jan., Feb., Mar., Apr., May, June, July, Aug., Sept., Oct., Nov., Dec.]
1959  seasons: [Spring, Summer, Autumn, Winter]
1960roles: {}
1961terms:
1962  no date:
1963    long: no date
1964    short: n.d.
1965"#;
1966
1967        let locale = Locale::from_yaml_str(yaml).unwrap();
1968        assert_eq!(
1969            locale.general_term(&GeneralTerm::NoDate, &TermForm::Long, None),
1970            Some("no date")
1971        );
1972        assert_eq!(
1973            locale.general_term(&GeneralTerm::NoDate, &TermForm::Short, None),
1974            Some("n.d.")
1975        );
1976        assert_eq!(locale.terms.no_date.as_deref(), Some("n.d."));
1977    }
1978
1979    /// v2 locale with grammar-options overrides punctuation_in_quote correctly.
1980    #[test]
1981    fn test_v2_grammar_options_sync_punctuation_in_quote() {
1982        let yaml = r#"
1983locale-schema-version: "2"
1984locale: en-GB
1985grammar-options:
1986  punctuation-in-quote: false
1987"#;
1988        let locale = Locale::from_yaml_str(yaml).unwrap();
1989        // grammar_options is the authoritative source for v2 locales
1990        assert!(!locale.grammar_options.punctuation_in_quote);
1991        // legacy field is synced from grammar_options
1992        assert!(!locale.punctuation_in_quote);
1993    }
1994
1995    /// v1 locale (no grammar-options) derives punctuation_in_quote from locale ID.
1996    #[test]
1997    fn test_v1_locale_derives_punctuation_from_locale_id() {
1998        let yaml = r#"
1999locale: en-US
2000"#;
2001        let locale = Locale::from_yaml_str(yaml).unwrap();
2002        // en-US uses American style (inside)
2003        assert!(locale.punctuation_in_quote);
2004        assert!(locale.grammar_options.punctuation_in_quote);
2005    }
2006
2007    /// apply_override merges messages key-by-key into the base locale.
2008    #[test]
2009    fn test_apply_override_merges_messages() {
2010        let mut locale = Locale::en_us();
2011        locale
2012            .messages
2013            .insert("term.page-label".into(), "p.".into());
2014        let ov = LocaleOverride {
2015            messages: [("term.page-label".into(), "pg.".into())].into(),
2016            ..Default::default()
2017        };
2018        locale.apply_override(&ov);
2019        assert_eq!(
2020            locale.messages.get("term.page-label").map(|s| s.as_str()),
2021            Some("pg.")
2022        );
2023    }
2024
2025    /// apply_override with grammar_options replaces block and syncs punctuation_in_quote.
2026    #[test]
2027    fn test_apply_override_grammar_options_syncs_punctuation() {
2028        let mut locale = Locale::en_us();
2029        locale.punctuation_in_quote = false;
2030        let ov = LocaleOverride {
2031            grammar_options: Some(GrammarOptions {
2032                punctuation_in_quote: true,
2033                ..Default::default()
2034            }),
2035            ..Default::default()
2036        };
2037        locale.apply_override(&ov);
2038        assert!(locale.punctuation_in_quote);
2039        assert!(locale.grammar_options.punctuation_in_quote);
2040    }
2041
2042    #[test]
2043    fn test_resolved_locator_term_evaluates_plural_message() {
2044        let locale = Locale::en_us();
2045
2046        assert_eq!(
2047            locale.resolved_locator_term(&LocatorType::Page, false, &TermForm::Short, None),
2048            Some("p.".to_string())
2049        );
2050        assert_eq!(
2051            locale.resolved_locator_term(&LocatorType::Page, true, &TermForm::Short, None),
2052            Some("pp.".to_string())
2053        );
2054    }
2055
2056    #[test]
2057    fn test_resolved_locator_term_falls_back_to_custom_locale_form_then_raw_key() {
2058        let locale = Locale::from_yaml_str(
2059            r#"
2060locale: en-US
2061locators:
2062  reel:
2063    long:
2064      singular: "reel"
2065      plural: "reels"
2066"#,
2067        )
2068        .expect("custom locale should parse");
2069
2070        assert_eq!(
2071            locale.resolved_locator_term(
2072                &LocatorType::Custom("reel".to_string()),
2073                false,
2074                &TermForm::Short,
2075                None,
2076            ),
2077            Some("reel".to_string())
2078        );
2079        assert_eq!(
2080            locale.resolved_locator_term(
2081                &LocatorType::Custom("movement".to_string()),
2082                false,
2083                &TermForm::Short,
2084                None,
2085            ),
2086            Some("movement".to_string())
2087        );
2088    }
2089
2090    #[test]
2091    fn test_legacy_locator_terms_under_terms_still_populate_locators() {
2092        let locale = Locale::from_yaml_str(
2093            r#"
2094locale: en-US
2095terms:
2096  page:
2097    short:
2098      singular: "pg."
2099      plural: "pgs."
2100"#,
2101        )
2102        .expect("legacy locator terms should parse");
2103
2104        assert_eq!(
2105            locale.resolved_locator_term(&LocatorType::Page, false, &TermForm::Short, None),
2106            Some("pg.".to_string())
2107        );
2108    }
2109
2110    #[test]
2111    fn test_explicit_locators_override_legacy_terms_for_builtin_keys() {
2112        let locale = Locale::from_yaml_str(
2113            r#"
2114locale: en-US
2115terms:
2116  page:
2117    short:
2118      singular: "pg."
2119      plural: "pgs."
2120locators:
2121  page:
2122    short:
2123      singular: "p."
2124      plural: "pp."
2125"#,
2126        )
2127        .expect("mixed locator forms should parse");
2128
2129        assert_eq!(
2130            locale.resolved_locator_term(&LocatorType::Page, false, &TermForm::Short, None),
2131            Some("p.".to_string())
2132        );
2133    }
2134
2135    #[test]
2136    fn test_non_locator_terms_are_not_reclassified_as_custom_locators() {
2137        let locale = Locale::from_yaml_str(
2138            r#"
2139locale: en-US
2140terms:
2141  and:
2142    long: "und"
2143"#,
2144        )
2145        .expect("general terms should parse");
2146
2147        assert_eq!(locale.terms.and.as_deref(), Some("und"));
2148        assert!(
2149            !locale
2150                .locators
2151                .contains_key(&LocatorType::Custom("and".to_string()))
2152        );
2153    }
2154
2155    #[test]
2156    fn test_resolved_role_term_evaluates_plural_message() {
2157        let locale = Locale::en_us();
2158
2159        assert_eq!(
2160            locale.resolved_role_term(&ContributorRole::Editor, false, &TermForm::Long, None),
2161            Some("editor".to_string())
2162        );
2163        assert_eq!(
2164            locale.resolved_role_term(&ContributorRole::Editor, true, &TermForm::Long, None),
2165            Some("editors".to_string())
2166        );
2167    }
2168
2169    #[test]
2170    fn test_role_term_prefers_common_form_for_mixed_gender_requests() {
2171        let locale = Locale::from_yaml_str(
2172            r#"
2173locale: es-ES
2174roles:
2175  editor:
2176    long:
2177      singular:
2178        masculine: editor
2179        feminine: editora
2180        common: persona editora
2181      plural:
2182        masculine: editores
2183        feminine: editoras
2184        common: equipo editorial
2185    short:
2186      singular: ed.
2187      plural: eds.
2188    verb: editado por
2189"#,
2190        )
2191        .expect("gendered locale should parse");
2192
2193        assert_eq!(
2194            locale.role_term(
2195                &ContributorRole::Editor,
2196                false,
2197                &TermForm::Long,
2198                Some(GrammaticalGender::Feminine),
2199            ),
2200            Some("editora")
2201        );
2202        assert_eq!(
2203            locale.role_term(
2204                &ContributorRole::Editor,
2205                true,
2206                &TermForm::Long,
2207                Some(GrammaticalGender::Common),
2208            ),
2209            Some("equipo editorial")
2210        );
2211    }
2212
2213    #[test]
2214    fn test_no_date_term_falls_back_when_requested_gender_has_no_matching_slot() {
2215        let locale = Locale::from_yaml_str(
2216            r#"
2217locale: es-ES
2218terms:
2219  no date:
2220    long:
2221      masculine: sin fecha
2222  no_date: s. f.
2223"#,
2224        )
2225        .expect("locale should parse");
2226
2227        assert_eq!(
2228            locale.general_term(
2229                &GeneralTerm::NoDate,
2230                &TermForm::Long,
2231                Some(GrammaticalGender::Common),
2232            ),
2233            Some("s. f.")
2234        );
2235    }
2236
2237    #[test]
2238    fn test_es_es_locale_is_embedded() {
2239        let bytes = crate::embedded::get_locale_bytes("es-ES").expect("es-ES should be embedded");
2240        let yaml = std::str::from_utf8(bytes).expect("embedded locale should be utf-8");
2241        let locale = Locale::from_yaml_str(yaml).expect("embedded es-ES should parse");
2242
2243        assert_eq!(locale.locale, "es-ES");
2244        assert_eq!(
2245            locale.resolved_role_term(
2246                &ContributorRole::Editor,
2247                false,
2248                &TermForm::Long,
2249                Some(GrammaticalGender::Feminine),
2250            ),
2251            Some("editora".to_string())
2252        );
2253    }
2254
2255    #[test]
2256    fn test_es_es_role_term_resolves_gendered_mf2_message() {
2257        let bytes = crate::embedded::get_locale_bytes("es-ES").expect("es-ES should be embedded");
2258        let yaml = std::str::from_utf8(bytes).expect("embedded locale should be utf-8");
2259        let locale = Locale::from_yaml_str(yaml).expect("embedded es-ES should parse");
2260
2261        assert_eq!(
2262            locale.resolved_role_term(
2263                &ContributorRole::Editor,
2264                true,
2265                &TermForm::Long,
2266                Some(GrammaticalGender::Masculine),
2267            ),
2268            Some("editores".to_string())
2269        );
2270        assert_eq!(
2271            locale.resolved_role_term(
2272                &ContributorRole::Translator,
2273                true,
2274                &TermForm::Long,
2275                Some(GrammaticalGender::Feminine),
2276            ),
2277            Some("traductoras".to_string())
2278        );
2279        assert_eq!(
2280            locale.resolved_role_term_neutral(&ContributorRole::Editor, true, &TermForm::Long),
2281            Some("equipo editorial".to_string())
2282        );
2283    }
2284
2285    #[test]
2286    fn test_role_term_falls_back_when_mf2_message_cannot_evaluate() {
2287        let locale = Locale::from_yaml_str(
2288            r#"
2289locale: es-ES
2290evaluation:
2291  message-syntax: mf2
2292messages:
2293  role.editor.label-long: |
2294    .match {$gender :unknown} {$count :plural}
2295    when feminine one {editora}
2296roles:
2297  editor:
2298    long:
2299      singular:
2300        feminine: editora heredada
2301      plural:
2302        feminine: editoras heredadas
2303"#,
2304        )
2305        .expect("locale should parse");
2306
2307        assert_eq!(
2308            locale.resolved_role_term(
2309                &ContributorRole::Editor,
2310                false,
2311                &TermForm::Long,
2312                Some(GrammaticalGender::Feminine),
2313            ),
2314            Some("editora heredada".to_string())
2315        );
2316    }
2317
2318    #[test]
2319    fn test_lookup_genre_known_key() {
2320        let locale = Locale::from_yaml_str(
2321            r#"
2322locale: en-US
2323vocab:
2324  genre:
2325    phd-thesis: "PhD thesis"
2326"#,
2327        )
2328        .unwrap();
2329        assert_eq!(locale.lookup_genre("phd-thesis"), "PhD thesis");
2330    }
2331
2332    #[test]
2333    fn test_lookup_medium_known_key() {
2334        let locale = Locale::from_yaml_str(
2335            r#"
2336locale: en-US
2337vocab:
2338  medium:
2339    television: "Television"
2340"#,
2341        )
2342        .unwrap();
2343        assert_eq!(locale.lookup_medium("television"), "Television");
2344    }
2345
2346    #[test]
2347    fn test_lookup_genre_fallback() {
2348        let locale = Locale::en_us();
2349        // Unknown key → title-case first word + spaces
2350        assert_eq!(locale.lookup_genre("unknown-key"), "Unknown key");
2351    }
2352
2353    #[test]
2354    fn test_en_us_locale_uses_embedded_vocab() {
2355        let locale = Locale::en_us();
2356
2357        assert_eq!(locale.lookup_genre("phd-thesis"), "PhD thesis");
2358        assert_eq!(locale.lookup_medium("audio-cd"), "Audio CD");
2359    }
2360
2361    #[test]
2362    fn test_from_yaml_str_inherits_embedded_vocab_defaults() {
2363        let locale = Locale::from_yaml_str("locale: en-US\n").unwrap();
2364
2365        assert_eq!(locale.lookup_genre("phd-thesis"), "PhD thesis");
2366    }
2367
2368    #[test]
2369    fn test_partial_genre_vocab_override_preserves_medium_defaults() {
2370        let locale = Locale::from_yaml_str(
2371            r#"
2372locale: en-US
2373vocab:
2374  genre:
2375    phd-thesis: "Doctoral dissertation"
2376"#,
2377        )
2378        .unwrap();
2379
2380        assert_eq!(locale.lookup_genre("phd-thesis"), "Doctoral dissertation");
2381        assert_eq!(locale.lookup_medium("audio-cd"), "Audio CD");
2382    }
2383
2384    #[test]
2385    fn test_partial_medium_vocab_override_preserves_genre_defaults() {
2386        let locale = Locale::from_yaml_str(
2387            r#"
2388locale: en-US
2389vocab:
2390  medium:
2391    television: "Broadcast television"
2392"#,
2393        )
2394        .unwrap();
2395
2396        assert_eq!(locale.lookup_medium("television"), "Broadcast television");
2397        assert_eq!(locale.lookup_genre("phd-thesis"), "PhD thesis");
2398    }
2399
2400    #[test]
2401    fn test_kebab_to_display_single_word() {
2402        assert_eq!(kebab_to_display("video"), "Video");
2403    }
2404
2405    #[test]
2406    fn test_kebab_to_display_multiple_words() {
2407        assert_eq!(kebab_to_display("phd-thesis"), "Phd thesis");
2408        assert_eq!(kebab_to_display("audio-cd"), "Audio cd");
2409    }
2410}