1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
use std::{fmt::Display, ops::Deref, str::FromStr};
use anyhow::ensure;
/// Language tags similar to IETF BCP 47 (e.g. en-US)
///
/// For now they are defined to only support
///
/// * two character language code (e.g., en for English, zh for Chinese, es for Spanish) — based on ISO 639-1
/// * '-' hyphen character
/// * two character Region (or country) code (e.g., US for United States, CN for China, ES for Spain) — based on ISO 3166-1 alpha-2
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct LanguageTag {
tag: [u8; 5],
}
impl LanguageTag {
pub(crate) const US_ENGLISH: LanguageTag = LanguageTag::new_panics("en-US");
pub(crate) const BR_PORTUGUESE: LanguageTag = LanguageTag::new_panics("pt-BR");
/// Create a [`LanguageTag`] from the content of `tag`.
/// A [`LanguageTag`] is similar to the IETF BCP 47 standard.
///
/// # Errors
///
/// This function will return an error if the tag does not:
/// - have a length of 5 characters,
/// - consist solely of ascii alphabetic characters, except that it must
/// - have a '-' at the 2nd character position
pub fn new(tag: impl AsRef<str>) -> anyhow::Result<LanguageTag> {
let tag = tag.as_ref();
ensure!(tag.len() == 5);
ensure!(
tag.chars()
.take(2)
.chain(tag.chars().skip(3))
.all(|c| c.is_ascii_alphabetic())
);
ensure!(tag.chars().nth(2) == Some('-'));
// SAFETY: invariants ensured.
Ok(unsafe { Self::new_unchecked(tag) })
}
/// Returns a new [`LanguageTag`], using `tag`.
///
/// It is the caller's responsibility to ensure that `tag` is a valid tag.
///
/// # Panics
///
/// This function will panic if the tag does not:
/// - have a length of 5 characters,
/// - consist solely of ascii alphabetic characters, except that it must
/// - have a '-' at the 2nd character position
#[must_use]
pub(crate) const fn new_panics(tag: &str) -> LanguageTag {
let bytes = tag.as_bytes();
assert!(bytes.len() == 5);
assert!(
bytes[0].is_ascii_alphabetic()
&& bytes[1].is_ascii_alphabetic()
&& bytes[3].is_ascii_alphabetic()
&& bytes[4].is_ascii_alphabetic()
);
assert!(bytes[2] == b'-');
// SAFETY: invariants asserted.
unsafe { LanguageTag::new_unchecked(tag) }
}
/// Returns a new [`LanguageTag`] from `tag`.
///
/// # SAFETY
///
/// It is the caller's responsibility to ensure that `tag`:
/// - has a length of 5 characters, and
/// - consists solely of ascii-alphabetic characters.
///
/// If these constraints are not met, [`LanguageTag::deref`] (and therefore
/// [`LanguageTag::fmt`]) is unsound.
const unsafe fn new_unchecked(tag: &str) -> LanguageTag {
let bytes = tag.as_bytes();
let tag = [
bytes[0].to_ascii_lowercase(),
bytes[1].to_ascii_lowercase(),
bytes[2],
bytes[3].to_ascii_uppercase(),
bytes[4].to_ascii_uppercase(),
];
LanguageTag { tag }
}
/// Returns the en-US [`LanguageTag`].
#[must_use]
pub const fn english() -> Self {
Self::US_ENGLISH
}
/// Returns the pt-BR [`LanguageTag`].
#[must_use]
pub const fn portuguese() -> Self {
Self::BR_PORTUGUESE
}
}
impl FromStr for LanguageTag {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
LanguageTag::new(s)
}
}
impl Deref for LanguageTag {
type Target = str;
fn deref(&self) -> &Self::Target {
// SAFETY: self.tag is required to be valid ascii, thus valid utf8.
unsafe { str::from_utf8_unchecked(&self.tag) }
}
}
impl Display for LanguageTag {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.deref().fmt(f)
}
}
#[cfg(test)]
mod tests {
use crate::LanguageTag;
#[test]
fn valid_tags_are_allowed() {
assert!(LanguageTag::new("en-US").is_ok());
assert!(LanguageTag::new("pt-BR").is_ok());
}
#[test]
fn case_conversion() {
assert_eq!(&*LanguageTag::new("en-us").unwrap(), "en-US");
assert_eq!(&*LanguageTag::new("EN-us").unwrap(), "en-US");
assert_eq!(&*LanguageTag::new("EN-US").unwrap(), "en-US");
assert_eq!(&*LanguageTag::new("en-US").unwrap(), "en-US");
}
}