unicode_locale_parser/
lang.rs1use crate::constants::{LANG_UND, SEP};
2use crate::errors::ParserError;
3use crate::shared::split_str;
4use crate::subtags::{language_subtag, region_subtag, script_subtag, variant_subtag};
5
6use std::fmt::{self, Write};
7use std::iter::Peekable;
8use std::str::FromStr;
9
10#[derive(Debug, PartialEq)]
11pub struct UnicodeLanguageIdentifier {
12 pub language: String,
13 pub script: Option<String>,
14 pub region: Option<String>,
15 pub variants: Option<Vec<String>>,
16}
17
18pub fn parse_unicode_language_id(lang_id: &str) -> Result<UnicodeLanguageIdentifier, ParserError> {
43 if lang_id.is_empty() {
45 return Err(ParserError::Missing);
46 }
47
48 parse_unicode_language_id_from_iter(&mut split_str(lang_id).peekable())
49}
50
51pub fn parse_unicode_language_id_from_iter<'a>(
52 iter: &mut Peekable<impl Iterator<Item = &'a str>>,
53) -> Result<UnicodeLanguageIdentifier, ParserError> {
54 let language = if let Some(lang) = iter.next() {
56 language_subtag(lang)?
57 } else {
58 return Err(ParserError::Unexpected);
59 };
60 let language = String::from(language);
61
62 let mut script = None;
64 let mut region = None;
65 let mut variants = vec![];
66 let mut current = 1;
67 while let Some(subtag) = iter.peek() {
68 if current == 1 {
69 if let Ok(script_subtag) = script_subtag(subtag) {
70 script = Some(String::from(script_subtag));
71 current = 2;
72 } else if let Ok(region_subtag) = region_subtag(subtag) {
73 region = Some(String::from(region_subtag));
74 current = 3;
75 } else if let Ok(variant_subtag) = variant_subtag(subtag) {
76 variants.push(String::from(variant_subtag));
77 current = 3;
78 } else {
79 break;
80 }
81 } else if current == 2 {
82 if let Ok(region_subtag) = region_subtag(subtag) {
83 region = Some(String::from(region_subtag));
84 current = 3;
85 } else if let Ok(variant_subtag) = variant_subtag(subtag) {
86 variants.push(String::from(variant_subtag));
87 current = 3;
88 } else {
89 break;
90 }
91 } else if let Ok(variant_subtag) = variant_subtag(subtag) {
92 variants.push(String::from(variant_subtag));
93 } else {
94 break;
95 }
96 iter.next();
97 }
98
99 let variants = if variants.is_empty() {
101 None
102 } else {
103 variants.dedup();
104 Some(variants)
105 };
106
107 Ok(UnicodeLanguageIdentifier {
108 language,
109 script,
110 region,
111 variants,
112 })
113}
114
115impl fmt::Display for UnicodeLanguageIdentifier {
116 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
117 if self.language.is_empty() {
118 f.write_str(LANG_UND)?;
119 } else {
120 self.language.fmt(f)?;
121 }
122 if let Some(ref script) = self.script {
123 f.write_char(SEP)?;
124 script.fmt(f)?;
125 }
126 if let Some(ref region) = self.region {
127 f.write_char(SEP)?;
128 region.fmt(f)?;
129 }
130 if let Some(ref variants) = self.variants {
131 for variant in variants.iter() {
132 f.write_char(SEP)?;
133 variant.fmt(f)?;
134 }
135 }
136 Ok(())
137 }
138}
139
140impl FromStr for UnicodeLanguageIdentifier {
141 type Err = ParserError;
142
143 fn from_str(source: &str) -> Result<Self, Self::Err> {
144 parse_unicode_language_id(source)
145 }
146}
147
148#[test]
153fn success_parse_unicode_language_id() {
154 let result = parse_unicode_language_id("en-Latn-US-macos-windows-linux").unwrap();
156 assert_eq!(result.language, "en");
157 assert_eq!(result.script, Some("Latn".to_string()));
158 assert_eq!(result.region, Some("US".to_string()));
159 assert_eq!(
160 result.variants,
161 Some(vec![
162 "macos".to_string(),
163 "windows".to_string(),
164 "linux".to_string()
165 ])
166 );
167
168 let result = parse_unicode_language_id("en_Latn_US").unwrap();
170 assert_eq!(result.language, "en");
171 assert_eq!(result.script, Some("Latn".to_string()));
172 assert_eq!(result.region, Some("US".to_string()));
173
174 let result = parse_unicode_language_id("en").unwrap();
176 assert_eq!(result.language, "en");
177 assert_eq!(result.script, None);
178 assert_eq!(result.region, None);
179 assert_eq!(result.variants, None);
180
181 let result = parse_unicode_language_id("en-US").unwrap();
183 assert_eq!(result.language, "en");
184 assert_eq!(result.script, None);
185 assert_eq!(result.region, Some("US".to_string()));
186 assert_eq!(result.variants, None);
187
188 let result = parse_unicode_language_id("en-Latn").unwrap();
190 assert_eq!(result.language, "en");
191 assert_eq!(result.script, Some("Latn".to_string()));
192 assert_eq!(result.region, None);
193 assert_eq!(result.variants, None);
194
195 let result = parse_unicode_language_id("en-macos").unwrap();
197 assert_eq!(result.language, "en");
198 assert_eq!(result.script, None);
199 assert_eq!(result.region, None);
200 assert_eq!(result.variants, Some(vec!["macos".to_string()]));
201
202 let result = parse_unicode_language_id("en-Latn-US").unwrap();
204 assert_eq!(result.language, "en");
205 assert_eq!(result.script, Some("Latn".to_string()));
206 assert_eq!(result.region, Some("US".to_string()));
207 assert_eq!(result.variants, None);
208
209 let result = parse_unicode_language_id("root").unwrap();
211 assert_eq!(result.language, "");
212 assert_eq!(result.script, None);
213 assert_eq!(result.region, None);
214 assert_eq!(result.variants, None);
215
216 let result = parse_unicode_language_id("und-Latn-AT-macos").unwrap();
218 assert_eq!(result.language, "");
219 assert_eq!(result.script, Some("Latn".to_string()));
220 assert_eq!(result.region, Some("AT".to_string()));
221 assert_eq!(result.variants, Some(vec!["macos".to_string()]));
222
223 assert_eq!(
225 "en-Latn-US-macos",
226 format!("{}", parse_unicode_language_id("en-Latn-US-macos").unwrap())
227 );
228 assert_eq!(
229 "und-Latn-US-macos",
230 format!(
231 "{}",
232 parse_unicode_language_id("und-Latn-US-macos").unwrap()
233 )
234 );
235
236 assert_eq!(
238 parse_unicode_language_id("en-Latn-US").unwrap(),
239 parse_unicode_language_id("en-Latn-US").unwrap()
240 );
241
242 let result: UnicodeLanguageIdentifier = "en-Latn-US-macos".parse().unwrap();
244 assert_eq!("en", result.language);
245 assert_eq!(Some("Latn".to_string()), result.script);
246 assert_eq!(Some("US".to_string()), result.region);
247 assert_eq!(Some(vec!["macos".to_string()]), result.variants);
248 let result: UnicodeLanguageIdentifier = "en-Latn-US".parse().unwrap();
249 assert_eq!("en-Latn-US", format!("{}", result));
250}
251
252#[test]
253fn fail_parse_unicode_language_id() {
254 assert_eq!(
256 ParserError::Missing,
257 parse_unicode_language_id("").unwrap_err()
258 );
259}