1use thiserror::Error;
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
29pub enum LangCode {
30 De,
32 En,
34 Es,
36 Fr,
38 It,
40 Ja,
42 Pt,
44 Zh,
46}
47
48impl LangCode {
49 pub fn as_str(&self) -> &'static str {
51 match self {
52 Self::De => "de",
53 Self::En => "en",
54 Self::Es => "es",
55 Self::Fr => "fr",
56 Self::It => "it",
57 Self::Ja => "ja",
58 Self::Pt => "pt",
59 Self::Zh => "zh",
60 }
61 }
62}
63
64#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
70pub enum RegionCode {
71 Ar,
73 Au,
75 Br,
77 Ca,
79 Cl,
81 Cn,
83 Co,
85 De,
87 Es,
89 Fr,
91 Gb,
93 It,
95 Jp,
97 Mx,
99 Pe,
101 Pt,
103 Us,
105}
106
107impl RegionCode {
108 pub fn as_str(&self) -> &'static str {
110 match self {
111 Self::Ar => "AR",
112 Self::Au => "AU",
113 Self::Br => "BR",
114 Self::Ca => "CA",
115 Self::Cl => "CL",
116 Self::Cn => "CN",
117 Self::Co => "CO",
118 Self::De => "DE",
119 Self::Es => "ES",
120 Self::Fr => "FR",
121 Self::Gb => "GB",
122 Self::It => "IT",
123 Self::Jp => "JP",
124 Self::Mx => "MX",
125 Self::Pe => "PE",
126 Self::Pt => "PT",
127 Self::Us => "US",
128 }
129 }
130}
131
132#[derive(Debug, Clone, PartialEq, Eq, Hash)]
138pub struct Locale(String);
139
140impl Locale {
141 pub fn new(language: LangCode, region: Option<RegionCode>) -> Self {
146 let raw = match region {
147 Some(r) => format!("{}-{}", language.as_str(), r.as_str()),
148 None => language.as_str().to_string(),
149 };
150 Self(raw)
151 }
152
153 pub fn language(&self) -> LangCode {
156 let head = self.0.split('-').next().unwrap_or("");
161 parse_language(head).expect("Locale invariant: language always valid")
162 }
163
164 pub fn region(&self) -> Option<RegionCode> {
167 let mut parts = self.0.split('-');
168 let _lang = parts.next();
169 let region = parts.next()?;
170 Some(parse_region(region).expect("Locale invariant: region always valid"))
174 }
175
176 pub fn as_bcp47(&self) -> &str {
180 &self.0
181 }
182
183 pub fn language_only(&self) -> Locale {
186 Self::new(self.language(), None)
187 }
188
189 pub fn is_just_language(&self) -> bool {
191 !self.0.contains('-')
192 }
193
194 pub fn iter_supported() -> impl Iterator<Item = Locale> {
205 const LANGS: &[LangCode] = &[
206 LangCode::De,
207 LangCode::En,
208 LangCode::Es,
209 LangCode::Fr,
210 LangCode::It,
211 LangCode::Ja,
212 LangCode::Pt,
213 LangCode::Zh,
214 ];
215 const REGIONS: &[RegionCode] = &[
216 RegionCode::Ar,
217 RegionCode::Au,
218 RegionCode::Br,
219 RegionCode::Ca,
220 RegionCode::Cl,
221 RegionCode::Cn,
222 RegionCode::Co,
223 RegionCode::De,
224 RegionCode::Es,
225 RegionCode::Fr,
226 RegionCode::Gb,
227 RegionCode::It,
228 RegionCode::Jp,
229 RegionCode::Mx,
230 RegionCode::Pe,
231 RegionCode::Pt,
232 RegionCode::Us,
233 ];
234 LANGS.iter().flat_map(|lang| {
235 std::iter::once(Locale::new(*lang, None)).chain(
236 REGIONS
237 .iter()
238 .map(move |region| Locale::new(*lang, Some(*region))),
239 )
240 })
241 }
242}
243
244impl std::str::FromStr for Locale {
245 type Err = LocaleParseError;
246
247 fn from_str(raw: &str) -> Result<Self, Self::Err> {
248 let trimmed = raw.trim();
249 if trimmed.is_empty() {
250 return Err(LocaleParseError::Empty);
251 }
252 let normalised = trimmed.replace('_', "-");
256 let mut parts = normalised.split('-');
257 let lang_raw = parts.next().unwrap_or(""); let region_raw = parts.next();
259 if parts.next().is_some() {
260 return Err(LocaleParseError::TooManySubtags(trimmed.to_string()));
261 }
262
263 let lang_lower = lang_raw.to_ascii_lowercase();
264 let language = parse_language(&lang_lower)
265 .ok_or_else(|| LocaleParseError::UnknownLanguage(lang_raw.to_string()))?;
266
267 let region = match region_raw {
268 None => None,
269 Some(r) => {
270 let upper = r.to_ascii_uppercase();
271 let region = parse_region(&upper).ok_or_else(|| {
272 LocaleParseError::UnknownRegion(language.as_str().to_string(), r.to_string())
273 })?;
274 Some(region)
275 }
276 };
277
278 Ok(Self::new(language, region))
279 }
280}
281
282impl std::fmt::Display for Locale {
283 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
284 f.write_str(&self.0)
285 }
286}
287
288impl serde::Serialize for Locale {
294 fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
295 s.serialize_str(&self.0)
296 }
297}
298
299impl<'de> serde::Deserialize<'de> for Locale {
300 fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
301 use serde::de::Error;
302 let raw = String::deserialize(d)?;
303 raw.parse().map_err(D::Error::custom)
304 }
305}
306
307fn parse_language(s: &str) -> Option<LangCode> {
308 Some(match s {
309 "de" => LangCode::De,
310 "en" => LangCode::En,
311 "es" => LangCode::Es,
312 "fr" => LangCode::Fr,
313 "it" => LangCode::It,
314 "ja" => LangCode::Ja,
315 "pt" => LangCode::Pt,
316 "zh" => LangCode::Zh,
317 _ => return None,
318 })
319}
320
321fn parse_region(s: &str) -> Option<RegionCode> {
322 Some(match s {
323 "AR" => RegionCode::Ar,
324 "AU" => RegionCode::Au,
325 "BR" => RegionCode::Br,
326 "CA" => RegionCode::Ca,
327 "CL" => RegionCode::Cl,
328 "CN" => RegionCode::Cn,
329 "CO" => RegionCode::Co,
330 "DE" => RegionCode::De,
331 "ES" => RegionCode::Es,
332 "FR" => RegionCode::Fr,
333 "GB" => RegionCode::Gb,
334 "IT" => RegionCode::It,
335 "JP" => RegionCode::Jp,
336 "MX" => RegionCode::Mx,
337 "PE" => RegionCode::Pe,
338 "PT" => RegionCode::Pt,
339 "US" => RegionCode::Us,
340 _ => return None,
341 })
342}
343
344#[cfg(test)]
345mod tests {
346 use super::*;
347 use std::str::FromStr;
348
349 #[test]
352 fn parses_language_only() {
353 let l = Locale::from_str("es").unwrap();
354 assert_eq!(l.language(), LangCode::Es);
355 assert_eq!(l.region(), None);
356 assert_eq!(l.as_bcp47(), "es");
357 }
358
359 #[test]
360 fn parses_full_locale() {
361 let l = Locale::from_str("es-AR").unwrap();
362 assert_eq!(l.language(), LangCode::Es);
363 assert_eq!(l.region(), Some(RegionCode::Ar));
364 assert_eq!(l.as_bcp47(), "es-AR");
365 }
366
367 #[test]
368 fn parses_underscore_separator_canonicalises_to_hyphen() {
369 let l = Locale::from_str("es_AR").unwrap();
370 assert_eq!(l.as_bcp47(), "es-AR");
371 }
372
373 #[test]
374 fn parses_mixed_case_canonicalises() {
375 let l = Locale::from_str("ES-ar").unwrap();
376 assert_eq!(l.as_bcp47(), "es-AR");
377 }
378
379 #[test]
380 fn parses_with_surrounding_whitespace() {
381 let l = Locale::from_str(" es-AR ").unwrap();
382 assert_eq!(l.as_bcp47(), "es-AR");
383 }
384
385 #[test]
386 fn parses_pt_br() {
387 let l = Locale::from_str("pt-BR").unwrap();
388 assert_eq!(l.language(), LangCode::Pt);
389 assert_eq!(l.region(), Some(RegionCode::Br));
390 }
391
392 #[test]
395 fn empty_string_errors_with_empty_variant() {
396 assert_eq!(Locale::from_str("").unwrap_err(), LocaleParseError::Empty);
397 }
398
399 #[test]
400 fn whitespace_only_errors_with_empty_variant() {
401 assert_eq!(
402 Locale::from_str(" ").unwrap_err(),
403 LocaleParseError::Empty
404 );
405 }
406
407 #[test]
408 fn unknown_language_errors() {
409 match Locale::from_str("xx").unwrap_err() {
410 LocaleParseError::UnknownLanguage(s) => assert_eq!(s, "xx"),
411 other => panic!("expected UnknownLanguage, got {other:?}"),
412 }
413 }
414
415 #[test]
416 fn unknown_region_for_known_language_errors() {
417 match Locale::from_str("es-XX").unwrap_err() {
418 LocaleParseError::UnknownRegion(lang, region) => {
419 assert_eq!(lang, "es");
420 assert_eq!(region, "XX");
421 }
422 other => panic!("expected UnknownRegion, got {other:?}"),
423 }
424 }
425
426 #[test]
427 fn extra_subtags_errors_too_many() {
428 match Locale::from_str("es-AR-x").unwrap_err() {
429 LocaleParseError::TooManySubtags(s) => assert_eq!(s, "es-AR-x"),
430 other => panic!("expected TooManySubtags, got {other:?}"),
431 }
432 }
433
434 #[test]
435 fn script_subtag_errors_too_many() {
436 match Locale::from_str("zh-Hant-CN").unwrap_err() {
438 LocaleParseError::TooManySubtags(_) => {}
439 other => panic!("expected TooManySubtags, got {other:?}"),
440 }
441 }
442
443 #[test]
444 fn variant_subtag_errors_too_many() {
445 match Locale::from_str("de-DE-1996").unwrap_err() {
446 LocaleParseError::TooManySubtags(_) => {}
447 other => panic!("expected TooManySubtags, got {other:?}"),
448 }
449 }
450
451 #[test]
452 fn m49_un_region_code_errors_unknown_region() {
453 match Locale::from_str("es-419").unwrap_err() {
455 LocaleParseError::UnknownRegion(lang, region) => {
456 assert_eq!(lang, "es");
457 assert_eq!(region, "419");
458 }
459 other => panic!("expected UnknownRegion, got {other:?}"),
460 }
461 }
462
463 #[test]
470 fn lang_only_trim_drops_region_for_whisper_hint() {
471 for (input, expected_iso639_1) in [
472 ("es-AR", "es"),
473 ("es-MX", "es"),
474 ("en-GB", "en"),
475 ("en-US", "en"),
476 ("pt-BR", "pt"),
477 ("pt-PT", "pt"),
478 ("zh-CN", "zh"),
479 ("ja-JP", "ja"),
480 ("es", "es"),
482 ("en", "en"),
483 ] {
484 let l = Locale::from_str(input).unwrap();
485 assert_eq!(
486 l.language().as_str(),
487 expected_iso639_1,
488 "BCP-47 {input} must trim to ISO-639-1 {expected_iso639_1}"
489 );
490 }
491 }
492
493 #[test]
498 fn iter_supported_yields_full_cross_product() {
499 let all: Vec<String> = Locale::iter_supported()
500 .map(|l| l.as_bcp47().to_string())
501 .collect();
502 assert_eq!(
503 all.len(),
504 8 * (1 + 17),
505 "expected 8 langs × (lang-only + 17 regions) = 144"
506 );
507 let unique: std::collections::HashSet<_> = all.iter().collect();
508 assert_eq!(unique.len(), all.len(), "no duplicates in iter_supported");
509 let mut sorted = all.clone();
511 sorted.sort();
512 assert_eq!(sorted.first().map(String::as_str), Some("de"));
513 assert_eq!(sorted.last().map(String::as_str), Some("zh-US"));
514 }
515}
516
517#[derive(Debug, Clone, Error, PartialEq, Eq)]
521pub enum LocaleParseError {
522 #[error("empty locale string")]
524 Empty,
525 #[error("unsupported language subtag `{0}`")]
527 UnknownLanguage(String),
528 #[error("unsupported region subtag `{1}` for language `{0}`")]
531 UnknownRegion(String, String),
532 #[error("unsupported subtag count: locale `{0}` has more than one region/script subtag")]
536 TooManySubtags(String),
537}
538
539pub const DEFAULT_VOICE_ID: &str = "en-US-AriaNeural";
542
543pub fn default_voice_for_locale(locale: Option<&Locale>) -> &'static str {
552 let Some(loc) = locale else {
553 return DEFAULT_VOICE_ID;
554 };
555 match (loc.language(), loc.region()) {
556 (LangCode::Es, Some(RegionCode::Ar)) => "es-AR-ElenaNeural",
558 (LangCode::Es, Some(RegionCode::Mx)) => "es-MX-DaliaNeural",
559 (LangCode::Es, Some(RegionCode::Es)) => "es-ES-ElviraNeural",
560 (LangCode::Es, Some(RegionCode::Co)) => "es-CO-SalomeNeural",
561 (LangCode::Es, Some(RegionCode::Pe)) => "es-PE-CamilaNeural",
562 (LangCode::Es, Some(RegionCode::Cl)) => "es-CL-CatalinaNeural",
563 (LangCode::Es, Some(RegionCode::Us)) => "es-US-PalomaNeural",
564 (LangCode::Es, _) => "es-MX-DaliaNeural",
565 (LangCode::En, Some(RegionCode::Us)) => "en-US-AriaNeural",
567 (LangCode::En, Some(RegionCode::Gb)) => "en-GB-SoniaNeural",
568 (LangCode::En, Some(RegionCode::Au)) => "en-AU-NatashaNeural",
569 (LangCode::En, Some(RegionCode::Ca)) => "en-CA-ClaraNeural",
570 (LangCode::En, _) => "en-US-AriaNeural",
571 (LangCode::Pt, Some(RegionCode::Br)) => "pt-BR-FranciscaNeural",
573 (LangCode::Pt, Some(RegionCode::Pt)) => "pt-PT-RaquelNeural",
574 (LangCode::Pt, _) => "pt-BR-FranciscaNeural",
575 (LangCode::Fr, Some(RegionCode::Fr)) => "fr-FR-DeniseNeural",
577 (LangCode::Fr, Some(RegionCode::Ca)) => "fr-CA-SylvieNeural",
578 (LangCode::Fr, _) => "fr-FR-DeniseNeural",
579 (LangCode::It, Some(RegionCode::It)) => "it-IT-ElsaNeural",
581 (LangCode::It, _) => "it-IT-ElsaNeural",
582 (LangCode::De, Some(RegionCode::De)) => "de-DE-KatjaNeural",
584 (LangCode::De, _) => "de-DE-KatjaNeural",
585 (LangCode::Ja, Some(RegionCode::Jp)) => "ja-JP-NanamiNeural",
587 (LangCode::Ja, _) => "ja-JP-NanamiNeural",
588 (LangCode::Zh, Some(RegionCode::Cn)) => "zh-CN-XiaoxiaoNeural",
590 (LangCode::Zh, _) => "zh-CN-XiaoxiaoNeural",
591 }
592}