1use super::Locale;
14use super::message::Mf2MessageEvaluator;
15use super::raw;
16use super::types::{
17 ContributorTerm, DateTerms, LocaleOverride, LocatorTerm, MaybeGendered, MessageSyntax,
18 MonthNames, SimpleTerm, SingularPlural,
19};
20use crate::citation::LocatorType;
21use crate::template::ContributorRole;
22use std::collections::HashMap;
23use std::sync::Arc;
24
25impl Locale {
26 pub fn from_yaml_str(yaml: &str) -> Result<Self, String> {
32 let raw: raw::RawLocale = serde_yaml::from_str(yaml)
33 .map_err(|e| format!("Failed to parse locale YAML: {}", e))?;
34
35 Ok(Self::from_raw(raw))
36 }
37
38 pub fn load(locale_id: &str, locales_dir: &std::path::Path) -> Self {
41 let extensions = ["yaml", "yml", "json", "cbor"];
42
43 for ext in &extensions {
44 let file_name = format!("{}.{}", locale_id, ext);
45 let file_path = locales_dir.join(&file_name);
46
47 if file_path.exists() {
48 match Self::from_file(&file_path) {
49 Ok(locale) => return locale,
50 Err(e) => {
51 eprintln!(
52 "Warning: Failed to load locale {}.{}: {}",
53 locale_id, ext, e
54 );
55 }
56 }
57 }
58 }
59
60 if locale_id.contains('-') {
61 let base = locale_id.split('-').next().unwrap_or("en");
62 if let Ok(entries) = std::fs::read_dir(locales_dir) {
63 for entry in entries.flatten() {
64 let name = entry.file_name();
65 let name_str = name.to_string_lossy();
66 if (name_str.starts_with(base)
67 && extensions.iter().any(|ext| name_str.ends_with(ext)))
68 && let Ok(locale) = Self::from_file(&entry.path())
69 {
70 return locale;
71 }
72 }
73 }
74 }
75
76 Self::en_us()
77 }
78
79 pub fn from_file(path: &std::path::Path) -> Result<Self, String> {
86 let bytes =
87 std::fs::read(path).map_err(|e| format!("Failed to read locale file: {}", e))?;
88 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("yaml");
89
90 match ext {
91 "cbor" => ciborium::de::from_reader::<raw::RawLocale, _>(std::io::Cursor::new(&bytes))
92 .map(Self::from_raw)
93 .map_err(|e| format!("Failed to parse CBOR locale: {}", e)),
94 "json" => serde_json::from_slice::<raw::RawLocale>(&bytes)
95 .map(Self::from_raw)
96 .map_err(|e| format!("Failed to parse JSON locale: {}", e)),
97 _ => {
98 let content = String::from_utf8_lossy(&bytes);
99 Self::from_yaml_str(&content)
100 }
101 }
102 }
103
104 #[allow(
106 clippy::too_many_lines,
107 reason = "Complex parsing of raw locale data with multiple term types"
108 )]
109 fn from_raw(raw: raw::RawLocale) -> Self {
110 let punctuation_in_quote = raw.locale.starts_with("en-US")
111 || (raw.locale.starts_with("en") && !raw.locale.starts_with("en-GB"));
112
113 let mut locale = Locale::en_us();
114 locale.locale = raw.locale.clone();
115 locale.dates = DateTerms {
116 months: MonthNames {
117 long: raw.dates.months.long,
118 short: raw.dates.months.short,
119 },
120 seasons: raw.dates.seasons,
121 uncertainty_term: raw.dates.uncertainty_term,
122 open_ended_term: raw.dates.open_ended_term,
123 am: raw.dates.am,
124 pm: raw.dates.pm,
125 timezone_utc: raw.dates.timezone_utc,
126 before_era: raw.dates.before_era,
127 ad: raw.dates.ad,
128 bc: raw.dates.bc,
129 bce: raw.dates.bce,
130 ce: raw.dates.ce,
131 };
132 locale.punctuation_in_quote = punctuation_in_quote;
133 locale.sort_articles = Self::default_articles_for_locale(&raw.locale);
134
135 locale.locale_schema_version = raw.locale_schema_version;
136 locale.evaluation = raw.evaluation.unwrap_or_default();
137 locale.messages = raw.messages;
138 locale.date_formats = raw.date_formats;
139 locale.legacy_term_aliases = raw.legacy_term_aliases;
140
141 if let Some(raw_vocab) = raw.vocab {
142 locale.vocab.genre.extend(raw_vocab.genre);
143 locale.vocab.medium.extend(raw_vocab.medium);
144 }
145
146 if let Some(go) = raw.grammar_options {
147 locale.grammar_options = go;
148 } else {
149 locale.grammar_options.punctuation_in_quote = locale.punctuation_in_quote;
150 }
151 locale.punctuation_in_quote = locale.grammar_options.punctuation_in_quote;
152
153 if let Some(nf) = raw.number_formats {
154 locale.number_formats = nf;
155 }
156
157 let explicit_locator_keys: std::collections::HashSet<LocatorType> = raw
158 .locators
159 .keys()
160 .filter_map(|key| Self::parse_builtin_locator_type(key))
161 .collect();
162
163 for (key, value) in &raw.locators {
164 if let Some(locator_type) = Self::parse_locator_type(key) {
165 let locator_term = LocatorTerm {
166 long: Self::extract_singular_plural(value.long.as_ref().as_ref()),
167 short: Self::extract_singular_plural(value.short.as_ref().as_ref()),
168 symbol: Self::extract_singular_plural(value.symbol.as_ref().as_ref()),
169 gender: value.gender.clone(),
170 };
171 locale.locators.insert(locator_type, locator_term);
172 }
173 }
174
175 for (key, value) in &raw.terms {
176 if let Some(locator_type) = Self::parse_builtin_locator_type(key)
177 && !explicit_locator_keys.contains(&locator_type)
178 && let Some(forms) = Self::get_forms(value)
179 {
180 let locator_term = LocatorTerm {
181 long: Self::extract_singular_plural(forms.get("long").as_ref()),
182 short: Self::extract_singular_plural(forms.get("short").as_ref()),
183 symbol: Self::extract_singular_plural(forms.get("symbol").as_ref()),
184 gender: None,
185 };
186 locale.locators.insert(locator_type, locator_term);
187 continue;
188 }
189
190 match key.as_str() {
191 "and" => {
192 if let Some(forms) = Self::get_forms(value) {
193 if let Some(v) = forms.get("long").and_then(|v| v.as_string()) {
194 locale.terms.and = Some(v.to_string());
195 }
196 if let Some(v) = forms.get("symbol").and_then(|v| v.as_string()) {
197 locale.terms.and_symbol = Some(v.to_string());
198 }
199 }
200 }
201 "et_al" => {
202 if let Some(forms) = Self::get_forms(value)
203 && let Some(v) = forms.get("long").and_then(|v| v.as_string())
204 {
205 locale.terms.et_al = Some(v.to_string());
206 }
207 }
208 "and others" | "and_others" => {
209 if let Some(forms) = Self::get_forms(value)
210 && let Some(v) = forms.get("long").and_then(|v| v.as_string())
211 {
212 locale.terms.and_others = Some(v.to_string());
213 }
214 }
215 "accessed" => {
216 if let Some(forms) = Self::get_forms(value)
217 && let Some(v) = forms.get("long").and_then(|v| v.as_string())
218 {
219 locale.terms.accessed = Some(v.to_string());
220 }
221 }
222 "ibid" => {
223 if let Some(forms) = Self::get_forms(value)
224 && let Some(v) = forms.get("long").and_then(|v| v.as_string())
225 {
226 locale.terms.ibid = Some(v.to_string());
227 }
228 }
229 "no date" => {
230 let simple = Self::extract_simple_term_from_raw(value);
231 let short_fallback = simple.short.as_default_str().to_string();
232 locale
233 .terms
234 .general
235 .insert(super::types::GeneralTerm::NoDate, simple);
236 locale.terms.no_date.get_or_insert(short_fallback);
237 }
238 "no_date" => {
239 let simple = Self::extract_simple_term_from_raw(value);
240 locale.terms.no_date = Some(simple.short.as_str().to_string());
241 locale
242 .terms
243 .general
244 .entry(super::types::GeneralTerm::NoDate)
245 .or_insert(simple);
246 }
247 _ => {
248 if let Some(general_term) = Self::parse_general_term(key) {
249 let simple = Self::extract_simple_term_from_raw(value);
250 locale.terms.general.insert(general_term, simple);
251 }
252 }
253 }
254 }
255
256 for (key, role_term) in &raw.roles {
257 if let Some(role) = Self::parse_role_name(key) {
258 let contributor_term = ContributorTerm {
259 singular: Self::extract_simple_term(&role_term.long, &role_term.short, false),
260 plural: Self::extract_simple_term(&role_term.long, &role_term.short, true),
261 verb: Self::extract_verb_term(&role_term.verb, &role_term.verb_short),
262 };
263 locale.roles.insert(role, contributor_term);
264 }
265 }
266
267 locale.evaluator = match locale.evaluation.message_syntax {
268 MessageSyntax::Mf2 => Arc::new(Mf2MessageEvaluator),
269 MessageSyntax::Static => Arc::new(Mf2MessageEvaluator),
270 };
271
272 locale
273 }
274
275 fn default_articles_for_locale(locale_id: &str) -> Vec<String> {
277 #[allow(clippy::string_slice, reason = "locale_id is expected to be ASCII")]
278 let lang = &locale_id[..2.min(locale_id.len())];
279 match lang {
280 "en" => vec!["the".into(), "a".into(), "an".into()],
281 "de" => vec![
282 "der".into(),
283 "die".into(),
284 "das".into(),
285 "ein".into(),
286 "eine".into(),
287 ],
288 "fr" => vec![
289 "le".into(),
290 "la".into(),
291 "les".into(),
292 "l'".into(),
293 "un".into(),
294 "une".into(),
295 ],
296 "es" => vec![
297 "el".into(),
298 "la".into(),
299 "los".into(),
300 "las".into(),
301 "un".into(),
302 "una".into(),
303 ],
304 "it" => vec![
305 "il".into(),
306 "lo".into(),
307 "la".into(),
308 "i".into(),
309 "gli".into(),
310 "le".into(),
311 "un".into(),
312 "una".into(),
313 ],
314 "pt" => vec![
315 "o".into(),
316 "a".into(),
317 "os".into(),
318 "as".into(),
319 "um".into(),
320 "uma".into(),
321 ],
322 "nl" => vec!["de".into(), "het".into(), "een".into()],
323 _ => vec![],
324 }
325 }
326
327 fn get_forms(value: &raw::RawTermValue) -> Option<&HashMap<String, raw::RawTermValue>> {
328 match value {
329 raw::RawTermValue::Forms(forms) => Some(forms),
330 _ => None,
331 }
332 }
333
334 fn parse_locator_type(name: &str) -> Option<LocatorType> {
335 LocatorType::from_key(name).ok()
336 }
337
338 fn parse_builtin_locator_type(name: &str) -> Option<LocatorType> {
339 match Self::parse_locator_type(name)? {
340 LocatorType::Custom(_) => None,
341 locator => Some(locator),
342 }
343 }
344
345 fn parse_role_name(name: &str) -> Option<ContributorRole> {
346 match name {
347 "author" => Some(ContributorRole::Author),
348 "chair" => Some(ContributorRole::Chair),
349 "editor" => Some(ContributorRole::Editor),
350 "translator" => Some(ContributorRole::Translator),
351 "director" => Some(ContributorRole::Director),
352 "compiler" => Some(ContributorRole::Composer),
353 "illustrator" => Some(ContributorRole::Illustrator),
354 "collection-editor" => Some(ContributorRole::CollectionEditor),
355 "container-author" => Some(ContributorRole::ContainerAuthor),
356 "editorial-director" => Some(ContributorRole::EditorialDirector),
357 "textual-editor" | "textual_editor" => Some(ContributorRole::TextualEditor),
358 "interviewer" => Some(ContributorRole::Interviewer),
359 "original-author" => Some(ContributorRole::OriginalAuthor),
360 "recipient" => Some(ContributorRole::Recipient),
361 "reviewed-author" => Some(ContributorRole::ReviewedAuthor),
362 "composer" => Some(ContributorRole::Composer),
363 _ => None,
364 }
365 }
366
367 fn extract_singular_plural(value: Option<&&raw::RawTermValue>) -> Option<SingularPlural> {
368 match value {
369 Some(raw::RawTermValue::SingularPlural { singular, plural }) => Some(SingularPlural {
370 singular: Self::from_raw_gendered_string(singular),
371 plural: Self::from_raw_gendered_string(plural),
372 }),
373 Some(raw::RawTermValue::Simple(s)) => Some(SingularPlural {
374 singular: MaybeGendered::Plain(s.clone()),
375 plural: MaybeGendered::Plain(s.clone()),
376 }),
377 Some(raw::RawTermValue::Gendered {
378 masculine,
379 feminine,
380 neuter,
381 common,
382 }) => Some(SingularPlural {
383 singular: MaybeGendered::Gendered {
384 masculine: masculine.clone(),
385 feminine: feminine.clone(),
386 neuter: neuter.clone(),
387 common: common.clone(),
388 },
389 plural: MaybeGendered::Gendered {
390 masculine: masculine.clone(),
391 feminine: feminine.clone(),
392 neuter: neuter.clone(),
393 common: common.clone(),
394 },
395 }),
396 Some(raw::RawTermValue::Forms(forms)) => {
397 let singular = forms
398 .get("singular")
399 .map(Self::extract_maybe_gendered_string);
400 let plural = forms.get("plural").map(Self::extract_maybe_gendered_string);
401
402 singular.map(|s| SingularPlural {
403 plural: plural.unwrap_or_else(|| s.clone()),
404 singular: s,
405 })
406 }
407 _ => None,
408 }
409 }
410
411 fn extract_simple_term(
412 long: &Option<raw::RawTermValue>,
413 short: &Option<raw::RawTermValue>,
414 plural: bool,
415 ) -> SimpleTerm {
416 let long_str = long
417 .as_ref()
418 .map(|v| Self::extract_simple_gendered_term(v, plural))
419 .unwrap_or_default();
420
421 let short_str = short
422 .as_ref()
423 .map(|v| Self::extract_simple_gendered_term(v, plural))
424 .unwrap_or_default();
425
426 SimpleTerm {
427 long: long_str,
428 short: short_str,
429 }
430 }
431
432 fn extract_verb_term(
433 verb: &Option<raw::RawTermValue>,
434 verb_short: &Option<raw::RawTermValue>,
435 ) -> SimpleTerm {
436 let long_str = verb
437 .as_ref()
438 .and_then(|v| v.as_string())
439 .unwrap_or("")
440 .into();
441
442 let short_str = verb_short
443 .as_ref()
444 .and_then(|v| v.as_string())
445 .unwrap_or("")
446 .into();
447
448 SimpleTerm {
449 long: long_str,
450 short: short_str,
451 }
452 }
453
454 fn normalize_term_key(s: &str) -> String {
462 s.replace(['_', ' '], "-")
463 }
464
465 pub fn parse_general_term(name: &str) -> Option<super::types::GeneralTerm> {
467 use super::types::GeneralTerm;
468 match Self::normalize_term_key(name).as_str() {
469 "in" => Some(GeneralTerm::In),
470 "accessed" => Some(GeneralTerm::Accessed),
471 "retrieved" => Some(GeneralTerm::Retrieved),
472 "at" => Some(GeneralTerm::At),
473 "from" => Some(GeneralTerm::From),
474 "of" => Some(GeneralTerm::Of),
475 "to" => Some(GeneralTerm::To),
476 "by" => Some(GeneralTerm::By),
477 "no-date" => Some(GeneralTerm::NoDate),
478 "anonymous" => Some(GeneralTerm::Anonymous),
479 "circa" => Some(GeneralTerm::Circa),
480 "available-at" => Some(GeneralTerm::AvailableAt),
481 "ibid" => Some(GeneralTerm::Ibid),
482 "and" => Some(GeneralTerm::And),
483 "et-al" => Some(GeneralTerm::EtAl),
484 "and-others" => Some(GeneralTerm::AndOthers),
485 "forthcoming" => Some(GeneralTerm::Forthcoming),
486 "online" => Some(GeneralTerm::Online),
487 "here" => Some(GeneralTerm::Here),
488 "deposited" => Some(GeneralTerm::Deposited),
489 "review-of" => Some(GeneralTerm::ReviewOf),
490 "original-work-published" => Some(GeneralTerm::OriginalWorkPublished),
491 "personal-communication" => Some(GeneralTerm::PersonalCommunication),
492 "patent" => Some(GeneralTerm::Patent),
493 "volume" => Some(GeneralTerm::Volume),
494 "issue" => Some(GeneralTerm::Issue),
495 "page" => Some(GeneralTerm::Page),
496 "chapter" => Some(GeneralTerm::Chapter),
497 "edition" => Some(GeneralTerm::Edition),
498 "section" => Some(GeneralTerm::Section),
499 _ => None,
500 }
501 }
502
503 fn extract_simple_term_from_raw(value: &raw::RawTermValue) -> SimpleTerm {
504 match value {
505 raw::RawTermValue::Simple(s) => SimpleTerm {
506 long: s.clone().into(),
507 short: s.clone().into(),
508 },
509 raw::RawTermValue::Gendered {
510 masculine,
511 feminine,
512 neuter,
513 common,
514 } => SimpleTerm {
515 long: MaybeGendered::Gendered {
516 masculine: masculine.clone(),
517 feminine: feminine.clone(),
518 neuter: neuter.clone(),
519 common: common.clone(),
520 },
521 short: MaybeGendered::Gendered {
522 masculine: masculine.clone(),
523 feminine: feminine.clone(),
524 neuter: neuter.clone(),
525 common: common.clone(),
526 },
527 },
528 raw::RawTermValue::Forms(forms) => {
529 let long = forms
530 .get("long")
531 .map(Self::extract_maybe_gendered_string)
532 .unwrap_or_default();
533 let short = forms
534 .get("short")
535 .map(Self::extract_maybe_gendered_string)
536 .unwrap_or_else(|| long.clone());
537 SimpleTerm { long, short }
538 }
539 raw::RawTermValue::SingularPlural { singular, .. } => SimpleTerm {
540 long: Self::from_raw_gendered_string(singular),
541 short: Self::from_raw_gendered_string(singular),
542 },
543 }
544 }
545
546 fn from_raw_gendered_string(value: &raw::RawGenderedString) -> MaybeGendered<String> {
547 match value {
548 raw::RawGenderedString::Simple(value) => MaybeGendered::Plain(value.clone()),
549 raw::RawGenderedString::Gendered {
550 masculine,
551 feminine,
552 neuter,
553 common,
554 } => MaybeGendered::Gendered {
555 masculine: masculine.clone(),
556 feminine: feminine.clone(),
557 neuter: neuter.clone(),
558 common: common.clone(),
559 },
560 }
561 }
562
563 fn extract_maybe_gendered_string(value: &raw::RawTermValue) -> MaybeGendered<String> {
564 match value {
565 raw::RawTermValue::Simple(value) => MaybeGendered::Plain(value.clone()),
566 raw::RawTermValue::Gendered {
567 masculine,
568 feminine,
569 neuter,
570 common,
571 } => MaybeGendered::Gendered {
572 masculine: masculine.clone(),
573 feminine: feminine.clone(),
574 neuter: neuter.clone(),
575 common: common.clone(),
576 },
577 raw::RawTermValue::SingularPlural { singular, .. } => {
578 Self::from_raw_gendered_string(singular)
579 }
580 raw::RawTermValue::Forms(forms) => forms
581 .get("long")
582 .or_else(|| forms.get("singular"))
583 .map(Self::extract_maybe_gendered_string)
584 .unwrap_or_default(),
585 }
586 }
587
588 fn extract_simple_gendered_term(
589 value: &raw::RawTermValue,
590 plural: bool,
591 ) -> MaybeGendered<String> {
592 match value {
593 raw::RawTermValue::Simple(value) => MaybeGendered::Plain(value.clone()),
594 raw::RawTermValue::Gendered {
595 masculine,
596 feminine,
597 neuter,
598 common,
599 } => MaybeGendered::Gendered {
600 masculine: masculine.clone(),
601 feminine: feminine.clone(),
602 neuter: neuter.clone(),
603 common: common.clone(),
604 },
605 raw::RawTermValue::SingularPlural {
606 singular,
607 plural: plural_value,
608 } => {
609 if plural {
610 Self::from_raw_gendered_string(plural_value)
611 } else {
612 Self::from_raw_gendered_string(singular)
613 }
614 }
615 raw::RawTermValue::Forms(forms) => {
616 let key = if plural { "plural" } else { "singular" };
617 forms
618 .get(key)
619 .or_else(|| forms.get("long"))
620 .map(Self::extract_maybe_gendered_string)
621 .unwrap_or_default()
622 }
623 }
624 }
625
626 pub fn apply_override(&mut self, ov: &LocaleOverride) {
634 for (k, v) in &ov.messages {
635 self.messages.insert(k.clone(), v.clone());
636 }
637 if let Some(go) = &ov.grammar_options {
638 self.grammar_options = go.clone();
639 self.punctuation_in_quote = go.punctuation_in_quote;
640 }
641 for (k, v) in &ov.legacy_term_aliases {
642 self.legacy_term_aliases.insert(k.clone(), v.clone());
643 }
644 }
645}