1use std::cmp::Ordering;
2use std::collections::hash_map::Entry;
3use std::rc::Rc;
4
5use ahash::{HashMap, HashMapExt};
6use icu::collator::{BackwardSecondLevel, CaseLevel, Numeric};
7use icu::{
8 collator::{self, AlternateHandling, CaseFirst, Collator, MaxVariable, Strength},
9 locid::Locale,
10};
11
12use iri_string::types::{IriAbsoluteStr, IriReferenceStr, IriStr, IriString};
13
14use crate::error;
15
16#[derive(Debug, Clone, Eq, PartialEq)]
17pub(crate) struct CollatorQuery {
18 pub(crate) fallback: bool,
19 pub(crate) lang: Option<String>,
20 pub(crate) strength: Strength,
21 pub(crate) max_variable: MaxVariable,
22 pub(crate) alternate: AlternateHandling,
23 pub(crate) backwards: bool,
24 pub(crate) normalization: bool,
25 pub(crate) case_level: bool,
26 pub(crate) case_first: CaseFirst,
27 pub(crate) numeric: bool,
28 }
31
32impl From<CollatorQuery> for collator::CollatorOptions {
33 fn from(query: CollatorQuery) -> Self {
34 let mut options = collator::CollatorOptions::new();
35 options.strength = Some(query.strength);
36 options.alternate_handling = Some(query.alternate);
37 options.case_first = Some(query.case_first);
38 options.max_variable = Some(query.max_variable);
39 options.case_level = Some(if query.case_level {
40 CaseLevel::On
41 } else {
42 CaseLevel::Off
43 });
44 options.numeric = Some(if query.numeric {
45 Numeric::On
46 } else {
47 Numeric::Off
48 });
49 options.backward_second_level = Some({
50 if query.backwards {
51 BackwardSecondLevel::On
52 } else {
53 BackwardSecondLevel::Off
54 }
55 });
56 options
57 }
58}
59
60impl CollatorQuery {
61 fn from_url(url: &IriStr) -> error::Result<Self> {
62 let query = url.query_str().unwrap_or("");
63
64 let mut fallback = None;
65 let mut lang = None;
66 let mut strength = None;
67 let mut max_variable = None;
68 let mut alternate = None;
69 let mut backwards = None;
70 let mut normalization = None;
71 let mut case_level = None;
72 let mut case_first = None;
73 let mut numeric = None;
74 let mut has_unrecognized_key = false;
75
76 for (key, value) in Self::parse_collation_query(query) {
78 match key {
79 "fallback" => {
80 fallback = Some(yes_no_query_parameter(value));
81 }
82 "lang" => {
83 lang = Some(value.to_string());
84 }
85 "strength" => {
86 strength = Some(strength_query_parameter(value));
87 }
88 "maxVariable" => {
89 max_variable = Some(max_variable_query_parameter(value));
90 }
91 "alternate" => {
92 alternate = Some(alternate_query_parameter(value));
93 }
94 "backwards" => {
95 backwards = Some(yes_no_query_parameter(value));
96 }
97 "normalization" => {
98 normalization = Some(yes_no_query_parameter(value));
99 }
100 "caseLevel" => {
101 case_level = Some(yes_no_query_parameter(value));
102 }
103 "caseFirst" => {
104 case_first = Some(case_first_query_parameter(value));
105 }
106 "numeric" => {
107 numeric = Some(yes_no_query_parameter(value));
108 }
109 _ => {
110 has_unrecognized_key = true;
111 }
112 }
113 }
114 let fallback = fallback.unwrap_or(Ok(true)).unwrap_or(true);
115
116 fn unwrap_or_fail<T>(
118 v: Option<Result<T, Unrecognized>>,
119 default: T,
120 fallback: bool,
121 ) -> error::Result<T> {
122 if let Some(v) = v {
123 if let Ok(v) = v {
124 Ok(v)
125 } else if fallback {
126 Ok(default)
127 } else {
128 Err(error::Error::FOCH0002)
129 }
130 } else {
131 Ok(default)
132 }
133 }
134
135 if !fallback && has_unrecognized_key {
137 return Err(error::Error::FOCH0002);
138 }
139
140 Ok(CollatorQuery {
141 fallback,
142 lang: lang.map(|s| s.to_string()),
143 strength: unwrap_or_fail(strength, Strength::Tertiary, fallback)?,
144 max_variable: unwrap_or_fail(max_variable, MaxVariable::Punctuation, fallback)?,
145 alternate: unwrap_or_fail(alternate, AlternateHandling::NonIgnorable, fallback)?,
146 backwards: unwrap_or_fail(backwards, false, fallback)?,
147 normalization: unwrap_or_fail(normalization, false, fallback)?,
148 case_level: unwrap_or_fail(case_level, false, fallback)?,
149 case_first: unwrap_or_fail(case_first, CaseFirst::Off, fallback)?,
150 numeric: unwrap_or_fail(numeric, false, fallback)?,
151 })
152 }
153
154 fn parse_collation_query(s: &str) -> impl Iterator<Item = (&str, &str)> {
155 s.split(';').filter_map(|part| {
159 let mut parts = part.split('=');
160 let key = parts.next()?;
161 let value = parts.next()?;
162 Some((key, value))
163 })
164 }
165}
166
167#[derive(Debug)]
168pub enum Collation {
169 CodePoint,
171 Uca(Box<Collator>),
173 HtmlAscii,
175}
176
177impl Collation {
178 fn new(base_uri: Option<&IriAbsoluteStr>, uri: &IriReferenceStr) -> error::Result<Self> {
179 let uri = if let Some(base_uri) = base_uri {
180 let uri: IriString = uri.resolve_against(base_uri).into();
181 uri
182 } else {
183 let uri: IriString = uri.to_iri().map_err(|_| error::Error::FOCH0002)?.to_owned();
184 uri
185 };
186 if uri.scheme_str() != "http" || uri.authority_str() != Some("www.w3.org") {
187 return Err(error::Error::FOCH0002);
188 }
189 let path = uri.path_str();
190 Ok(match path {
191 "/2005/xpath-functions/collation/codepoint" => Collation::CodePoint,
192 "/2013/collation/UCA" => {
193 let collator_query = CollatorQuery::from_url(&uri)?;
194 Collation::Uca(Box::new(Self::uca_collator(collator_query)?))
195 }
196 "/2005/xpath-functions/collation/html-ascii-case-insensitive" => Collation::HtmlAscii,
197 "/2010/09/qt-fots-catalog/collation/caseblind" => Collation::HtmlAscii,
199 _ => return Err(error::Error::FOCH0002),
200 })
201 }
202
203 fn uca_collator(collator_query: CollatorQuery) -> error::Result<Collator> {
204 let locale = if let Some(lang) = &collator_query.lang {
205 match Locale::try_from_bytes(lang.as_bytes()) {
206 Ok(locale) => locale,
207 Err(_) => {
208 if collator_query.fallback {
209 Locale::UND
211 } else {
212 return Err(error::Error::FOCH0002);
213 }
214 }
215 }
216 } else {
217 Locale::UND
220 };
221
222 let locale = locale.into();
223 let options = collator_query.into();
224
225 Collator::try_new(&locale, options).map_err(|_| error::Error::FOCH0002)
226 }
227
228 pub(crate) fn compare(&self, a: &str, b: &str) -> Ordering {
229 match self {
230 Collation::CodePoint => a.cmp(b),
231 Collation::Uca(collator) => collator.compare(a, b),
232 Collation::HtmlAscii => a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase()),
233 }
234 }
235}
236
237#[derive(Debug)]
238pub(crate) struct Collations {
239 collations: HashMap<String, Rc<Collation>>,
240}
241
242impl Collations {
243 pub(crate) fn new() -> Self {
244 Self {
245 collations: HashMap::new(),
246 }
247 }
248
249 pub(crate) fn load(
250 &mut self,
251 base_uri: Option<&IriAbsoluteStr>,
252 uri: &IriReferenceStr,
253 ) -> error::Result<Rc<Collation>> {
254 match self.collations.entry(uri.to_string()) {
256 Entry::Occupied(entry) => Ok(entry.get().clone()),
257 Entry::Vacant(entry) => {
258 let collation = Collation::new(base_uri, uri)?;
259 Ok(entry.insert(Rc::new(collation)).clone())
260 }
261 }
262 }
263}
264
265struct Unrecognized;
266
267fn yes_no_query_parameter(value: &str) -> Result<bool, Unrecognized> {
268 match value {
269 "yes" => Ok(true),
270 "no" => Ok(false),
271 _ => Err(Unrecognized),
272 }
273}
274
275fn strength_query_parameter(value: &str) -> Result<Strength, Unrecognized> {
276 match value {
277 "primary" | "1" => Ok(Strength::Primary),
278 "secondary" | "2" => Ok(Strength::Secondary),
279 "tertiary" | "3" => Ok(Strength::Tertiary),
280 "quaternary" | "4" => Ok(Strength::Quaternary),
281 "identical" | "5" => Ok(Strength::Identical),
282 _ => Err(Unrecognized),
283 }
284}
285
286fn max_variable_query_parameter(value: &str) -> Result<MaxVariable, Unrecognized> {
287 match value {
288 "space" => Ok(MaxVariable::Space),
289 "punct" => Ok(MaxVariable::Punctuation),
290 "symbol" => Ok(MaxVariable::Symbol),
291 "currency" => Ok(MaxVariable::Currency),
292 _ => Err(Unrecognized),
293 }
294}
295
296fn alternate_query_parameter(value: &str) -> Result<AlternateHandling, Unrecognized> {
297 match value {
298 "non-ignorable" => Ok(AlternateHandling::NonIgnorable),
299 "shifted" => Ok(AlternateHandling::Shifted),
300 _ => Err(Unrecognized),
302 }
303}
304
305fn case_first_query_parameter(value: &str) -> Result<CaseFirst, Unrecognized> {
306 match value {
307 "upper" => Ok(CaseFirst::UpperFirst),
308 "lower" => Ok(CaseFirst::LowerFirst),
309 _ => Err(Unrecognized),
310 }
311}
312
313#[cfg(test)]
314mod tests {
315
316 use super::*;
317
318 #[test]
321 fn test_base_url() {
322 let base: &IriAbsoluteStr = "http://www.w3.org/".try_into().unwrap();
323 let path: &IriReferenceStr = "/2005/xpath-functions/collation/codepoint"
324 .try_into()
325 .unwrap();
326 let url = path.resolve_against(base);
327 assert_eq!(
328 url.to_string(),
329 "http://www.w3.org/2005/xpath-functions/collation/codepoint"
330 );
331 }
332
333 #[test]
334 fn test_base_url_with_full_url() {
335 let base: &IriAbsoluteStr = "http://www.another.org/".try_into().unwrap();
336 let path: &IriReferenceStr = "http://www.w3.org/2005/xpath-functions/collation/codepoint"
337 .try_into()
338 .unwrap();
339 let url = path.resolve_against(base);
340 assert_eq!(
341 url.to_string(),
342 "http://www.w3.org/2005/xpath-functions/collation/codepoint"
343 );
344 }
345
346 #[test]
347 fn test_base_url_with_just_qs() {
348 let base: &IriAbsoluteStr = "http://www.w3.org/2013/collation/UCA".try_into().unwrap();
349 let path: &IriReferenceStr = "?lang=foo".try_into().unwrap();
350 let url = path.resolve_against(base);
351 assert_eq!(
352 url.to_string(),
353 "http://www.w3.org/2013/collation/UCA?lang=foo"
354 );
355 }
356
357 #[test]
358 fn test_deserialize_query_string() {
359 let url : &IriStr = "http://www.w3.org/2013/collation/UCA?fallback=yes;lang=en;strength=primary;max_variable=punctuation;alternate=non-ignorable;backwards=no;normalization=no;caseLevel=no;caseFirst=upper;numeric=no".try_into().unwrap();
360 let query = CollatorQuery::from_url(url).unwrap();
361 assert_eq!(
362 query,
363 CollatorQuery {
364 fallback: true,
365 lang: Some("en".to_string()),
366 strength: Strength::Primary,
367 max_variable: MaxVariable::Punctuation,
368 alternate: AlternateHandling::NonIgnorable,
369 backwards: false,
370 normalization: false,
371 case_level: false,
372 case_first: CaseFirst::UpperFirst,
373 numeric: false,
374 }
375 )
376 }
377
378 #[test]
379 fn test_deserialize_query_string_default() {
380 let url: &IriStr = "http://www.w3.org/2013/collation/UCA?lang=en"
381 .try_into()
382 .unwrap();
383 let query = CollatorQuery::from_url(url).unwrap();
384 assert_eq!(
385 query,
386 CollatorQuery {
387 fallback: true,
388 lang: Some("en".to_string()),
389 strength: Strength::Tertiary,
390 max_variable: MaxVariable::Punctuation,
391 alternate: AlternateHandling::NonIgnorable,
392 backwards: false,
393 normalization: false,
394 case_level: false,
395 case_first: CaseFirst::Off,
396 numeric: false,
397 }
398 )
399 }
400
401 #[test]
402 fn test_deserialize_query_no_fallback_reject_wrong_value() {
403 let url: &IriStr =
404 "http://www.w3.org/2013/collation/UCA?lang=en;fallback=no;strength=nonsense"
405 .try_into()
406 .unwrap();
407 assert!(CollatorQuery::from_url(url).is_err());
408 }
409
410 #[test]
411 fn test_deserialize_query_no_fallback_reject_extra_param() {
412 let url: &IriStr =
413 "http://www.w3.org/2013/collation/UCA?lang=en;fallback=no;extra=nonsense"
414 .try_into()
415 .unwrap();
416 assert!(CollatorQuery::from_url(url).is_err());
417 }
418
419 #[test]
420 fn test_deserialize_query_yes_fallback_default_for_wrong_value() {
421 let url: &IriStr =
422 "http://www.w3.org/2013/collation/UCA?lang=en;fallback=yes;strength=nonsense"
423 .try_into()
424 .unwrap();
425 let query = CollatorQuery::from_url(url).unwrap();
426 assert_eq!(
427 query,
428 CollatorQuery {
429 fallback: true,
430 lang: Some("en".to_string()),
431 strength: Strength::Tertiary,
432 max_variable: MaxVariable::Punctuation,
433 alternate: AlternateHandling::NonIgnorable,
434 backwards: false,
435 normalization: false,
436 case_level: false,
437 case_first: CaseFirst::Off,
438 numeric: false,
439 }
440 )
441 }
442
443 #[test]
444 fn test_deserialize_query_yes_fallback_ignore_extra_parameter() {
445 let url: IriString =
446 "http://www.w3.org/2013/collation/UCA?lang=en;fallback=yes;extra=nonsense"
447 .try_into()
448 .unwrap();
449 let query = CollatorQuery::from_url(&url).unwrap();
450 assert_eq!(
451 query,
452 CollatorQuery {
453 fallback: true,
454 lang: Some("en".to_string()),
455 strength: Strength::Tertiary,
456 max_variable: MaxVariable::Punctuation,
457 alternate: AlternateHandling::NonIgnorable,
458 backwards: false,
459 normalization: false,
460 case_level: false,
461 case_first: CaseFirst::Off,
462 numeric: false,
463 }
464 )
465 }
466
467 #[test]
468 fn test_load_uca_collation() {
469 let mut collations = Collations::new();
470 let url: &IriReferenceStr = "http://www.w3.org/2013/collation/UCA?lang=se;fallback=no"
471 .try_into()
472 .unwrap();
473 let collation = collations.load(None, url);
474 assert!(collation.is_ok());
475 }
476
477 #[test]
478 fn test_load_uca_collation_fallback() {
479 let mut collations = Collations::new();
480 let url: &IriReferenceStr = "http://www.w3.org/2013/collation/UCA?lang=en-US;fallback=yes"
481 .try_into()
482 .unwrap();
483 let collation = collations.load(None, url);
484 assert!(collation.is_ok());
485 }
486
487 #[test]
502 fn test_load_codepoint_collation() {
503 let mut collations = Collations::new();
504 let url: &IriReferenceStr = "http://www.w3.org/2005/xpath-functions/collation/codepoint"
505 .try_into()
506 .unwrap();
507 let collation = collations.load(None, url);
508 assert!(collation.is_ok());
509 }
510
511 #[test]
512 fn test_load_html_ascii_collation() {
513 let mut collations = Collations::new();
514 let url: &IriReferenceStr =
515 "http://www.w3.org/2005/xpath-functions/collation/html-ascii-case-insensitive"
516 .try_into()
517 .unwrap();
518 let collation = collations.load(None, url);
519 assert!(collation.is_ok());
520 }
521}