web_lang/
lib.rs

1//! This library provides functions to match a set of available languages
2//! to those from the http `Accept-Language` header, urls, or other sources.
3//!
4//! Language tags (eg: `"en-au"`) are never validated,
5//! this crate simply tries to make sense of whatever value it is given,
6//! ignoring any input that it cannot understand, and find the best match
7//! based on a few simple rules.
8//!
9//! This crate is inspired by Django's translation handling.
10//!
11//! # Features
12//!
13//! - No unsafe code (`#[forbid(unsafe_code)]`)
14//! - No panics
15//! - Tested; code coverage: 100% (morally)
16//! - No dependencies
17//!
18//! # Examples
19//!
20//! Simply pass an iterable of language tags to find the best match.
21//!
22//! ```
23//! use web_lang::{match_lang, match_accept};
24//!
25//! // match a single language tag
26//! assert_eq!(
27//!     match_lang(
28//!         ["en", "en-au", "de"],
29//!         "en-gb"
30//!     ),
31//!     Some("en")
32//! );
33//!
34//! // match a set of language tags,
35//! // taken from the http `Accept-Language` header
36//! assert_eq!(
37//!     match_accept(
38//!         ["en", "en-au", "de"],
39//!         "de;q=0.5, en-gb;q=0.9, ja;q=0.2, *;q=0.1"
40//!     ),
41//!     Some("en")
42//! );
43//! ```
44//!
45//! Complete example with a custom language enum.
46//!
47//! ```
48//! use web_lang::{Language, match_lang, match_accept};
49//!
50//! #[derive(Copy, Clone, PartialEq, Debug)]
51//! enum MyLanguage {
52//!     English,
53//!     AustralianEnglish,
54//!     German,
55//!     Japanese,
56//! }
57//!
58//! impl Language for MyLanguage {
59//!     fn tag(&self) -> &str {
60//!         match self {
61//!             Self::English => "en",
62//!             Self::AustralianEnglish => "en-au",
63//!             Self::German => "de",
64//!             Self::Japanese => "ja",
65//!         }
66//!     }
67//! }
68//!
69//! const LANGUAGES: &[MyLanguage] = &[
70//!     MyLanguage::English,
71//!     MyLanguage::AustralianEnglish,
72//!     MyLanguage::German,
73//!     MyLanguage::Japanese
74//! ];
75//!
76//! // match a single language tag
77//! assert_eq!(
78//!     match_lang(
79//!         LANGUAGES.iter().copied(),
80//!         "en-gb"
81//!     ),
82//!     Some(MyLanguage::English)
83//! );
84//!
85//! // match a set of language tags,
86//! // taken from the http `Accept-Language` header
87//! assert_eq!(
88//!     match_accept(
89//!         LANGUAGES.iter().copied(),
90//!         "de;q=0.5, en-gb;q=0.9, ja;q=0.2, *;q=0.1"
91//!     ),
92//!     Some(MyLanguage::English)
93//! );
94//! ```
95
96#![forbid(unsafe_code)]
97
98/// The "quality" of an accepted language.
99///
100/// Higher values are preferred.
101#[derive(Clone, Copy, Debug)]
102struct Quality(f32);
103
104impl Ord for Quality {
105    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
106        self.0.total_cmp(&other.0)
107    }
108}
109
110impl PartialOrd for Quality {
111    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
112        Some(self.cmp(other))
113    }
114}
115
116impl PartialEq for Quality {
117    fn eq(&self, other: &Self) -> bool {
118        self.cmp(other).is_eq()
119    }
120}
121
122impl Eq for Quality {}
123
124impl PartialEq<f32> for Quality {
125    fn eq(&self, other: &f32) -> bool {
126        self.0 == *other
127    }
128}
129
130impl From<Quality> for f32 {
131    fn from(q: Quality) -> f32 {
132        q.0
133    }
134}
135
136impl TryFrom<f32> for Quality {
137    type Error = ();
138
139    fn try_from(f: f32) -> Result<Quality, ()> {
140        if f.is_finite() {
141            Ok(Quality(f))
142        } else {
143            Err(())
144        }
145    }
146}
147
148impl std::str::FromStr for Quality {
149    type Err = ();
150
151    fn from_str(s: &str) -> Result<Self, Self::Err> {
152        let value: f32 = s.parse().map_err(|_| ())?;
153        if value.is_finite() {
154            Ok(Quality(value))
155        } else {
156            Err(())
157        }
158    }
159}
160
161/// An accepted language with a ["quality"](Quality) indication.
162///
163/// This type corresponds to a single item from
164/// the http `Accept-Language` header, for example: `en;q=0.9`.
165#[derive(Clone, PartialEq, Eq, Debug)]
166struct AcceptLanguage<'a> {
167    /// The language tag; for example `en` or `en-au`.
168    ///
169    /// This field is [`None`] for wildcard (`*`).
170    pub language: Option<&'a str>,
171    /// The "quality"; higher values are preferred.
172    pub quality: Quality,
173}
174
175/// Parse a single language with optional ["quality"](Quality)
176/// as found in the http `Accept-Language` header.
177///
178/// This function always returns a result,
179/// even for invalid input.
180fn parse_accept_single(accept: &str) -> AcceptLanguage<'_> {
181    if let Some((language, quality)) = accept.split_once("q=") {
182        let language = language.trim_end();
183        if let Some(language) = language.strip_suffix(';') {
184            let language = language.trim_end();
185            AcceptLanguage {
186                language: if language.starts_with('*') {
187                    None
188                } else {
189                    Some(language)
190                },
191                quality: quality.trim_start().parse().unwrap_or(Quality(0.0)),
192            }
193        } else {
194            // missing ";" separator: "lang-tag-q=1.0"
195            // assume the "q" is part of the language
196            debug_assert!(accept.len() >= language.len() + 2);
197            debug_assert!(&accept[language.len()..(language.len() + 1)] == "q");
198            let language = &accept[..(language.len() + 1)];
199            AcceptLanguage {
200                language: if language.starts_with('*') {
201                    None
202                } else {
203                    Some(language)
204                },
205                quality: quality.trim_start().parse().unwrap_or(Quality(1.0)),
206            }
207        }
208    } else if accept.is_empty() {
209        AcceptLanguage {
210            language: None,
211            quality: Quality(0.0),
212        }
213    } else {
214        AcceptLanguage {
215            language: if accept.starts_with('*') {
216                None
217            } else {
218                Some(accept)
219            },
220            quality: Quality(1.0),
221        }
222    }
223}
224
225/// Parse a list of languages with optional ["qualities"](Quality)
226/// as found in the http `Accept-Language` header.
227///
228/// This function returns a vector sorted by descending quality.
229/// The order of items with identical qualities is preserved.
230///
231/// Note that languages with a quality lower than the wildcard (`*`)
232/// are preserved in the result vector.
233fn parse_accept(accept: &str) -> Vec<AcceptLanguage<'_>> {
234    let mut languages = accept
235        .split(',')
236        .map(|s| s.trim())
237        .filter(|s| !s.is_empty())
238        .map(parse_accept_single)
239        .collect::<Vec<_>>();
240    languages.sort_by_key(|lang| Quality(-lang.quality.0));
241    languages
242}
243
244/// Get a fallback language tag.
245fn fallback(language: &str) -> Option<&'static str> {
246    match language {
247        "zh-cn" => Some("zh-hans"),
248        "zh-hk" => Some("zh-hant"),
249        "zh-mo" => Some("zh-hant"),
250        "zh-my" => Some("zh-hans"),
251        "zh-sg" => Some("zh-hans"),
252        "zh-tw" => Some("zh-hant"),
253        _ => None,
254    }
255}
256
257/// Get a sequence of language prefixes, starting from the language itself.
258fn prefixes(language: &str) -> impl Iterator<Item = &str> {
259    std::iter::successors(Some(language), |language| {
260        language.rsplit_once('-').map(|(prefix, _)| prefix)
261    })
262}
263
264/// Get a sequence of language alternatives.
265///
266/// The returned iterator yields the following items.
267///   * The first item is the language itself.
268///   * The next item is a possible language fallback.
269///   * The last items are all other language prefixes,
270///     as returned by [`prefixes()`].
271///
272/// Most languages don't have a fallback, in which case
273/// the result is exactly the same as for [`prefixes()`].
274fn alternatives(language: &str) -> impl Iterator<Item = &str> {
275    std::iter::once(language)
276        .chain(fallback(language))
277        .chain(prefixes(language).skip(1))
278}
279
280/// Interface for a language.
281///
282/// This trait enables compatibility with your own language enum.
283/// Alternatively, you can simply use [`&str`] or [`String`] language tags.
284///
285/// # Examples
286///
287/// ```
288/// use web_lang::Language;
289///
290/// enum MyLanguage {
291///     English,
292///     AustralianEnglish,
293///     German,
294///     Japanese,
295/// }
296///
297/// impl Language for MyLanguage {
298///     fn tag(&self) -> &str {
299///         match self {
300///             Self::English => "en",
301///             Self::AustralianEnglish => "en-au",
302///             Self::German => "de",
303///             Self::Japanese => "ja",
304///         }
305///     }
306/// }
307/// ```
308pub trait Language {
309    /// The language "tag", for example `en` or `en-au`.
310    fn tag(&self) -> &str;
311}
312
313impl Language for &str {
314    fn tag(&self) -> &str {
315        self
316    }
317}
318
319impl Language for String {
320    fn tag(&self) -> &str {
321        self
322    }
323}
324
325/// Tries to match an available language to a single accepted language.
326///
327/// This function can be used to match a language from
328/// a single language tag, for example from a url.
329///
330/// Languages matching is case-insensitive.
331///
332/// ```
333/// use web_lang::match_lang;
334///
335/// assert_eq!(
336///     match_lang(
337///         ["en", "en-au", "de"],
338///         "en-gb"
339///     ),
340///     Some("en")
341/// );
342/// ```
343pub fn match_lang<L: Language>(
344    available: impl IntoIterator<Item = L> + Clone,
345    accept: &str,
346) -> Option<L> {
347    for accept_lang in alternatives(accept) {
348        for avail in available.clone() {
349            if accept_lang.eq_ignore_ascii_case(avail.tag()) {
350                return Some(avail);
351            }
352        }
353    }
354    None
355}
356
357/// Tries to match an available language to a list of accepted languages.
358///
359/// This function expects the list of accepted languages
360/// to be sorted by descending quality.
361///
362/// Languages matching is case-insensitive.
363/// Only accepted languages with positive quality are considered.
364fn match_multi<L: Language>(
365    available: impl IntoIterator<Item = L> + Clone,
366    accepted: &[AcceptLanguage<'_>],
367) -> Option<L> {
368    for accept in accepted {
369        if accept.quality <= Quality(0.0) {
370            return None;
371        }
372        if let Some(accept_lang) = accept.language {
373            let r#match = match_lang(available.clone(), accept_lang);
374            if let Some(language) = r#match {
375                return Some(language);
376            }
377        } else {
378            return None;
379        }
380    }
381    None
382}
383
384/// Tries to match an available language to a set of accepted languages.
385///
386/// This function can be used to match a language from
387/// the http `Accept-Language` header.
388///
389/// Languages matching is case-insensitive.
390/// Only accepted languages with positive quality are considered.
391///
392/// # Examples
393///
394/// ```
395/// use web_lang::match_accept;
396///
397/// assert_eq!(
398///     match_accept(
399///         ["en", "en-au", "de"],
400///         "de;q=0.5, en-gb;q=0.9, ja;q=0.2, *;q=0.1"
401///     ),
402///     Some("en")
403/// );
404/// ```
405pub fn match_accept<L: Language>(
406    available: impl IntoIterator<Item = L> + Clone,
407    accept: &str,
408) -> Option<L> {
409    match_multi(available, &parse_accept(accept))
410}
411
412#[cfg(test)]
413mod tests {
414    use super::{
415        alternatives, match_accept, match_lang, parse_accept,
416        parse_accept_single, prefixes, AcceptLanguage, Quality,
417    };
418
419    #[derive(Clone, Copy, PartialEq, Eq, Debug)]
420    enum Language {
421        English,
422        German,
423        Japanese,
424    }
425
426    impl super::Language for Language {
427        fn tag(&self) -> &str {
428            match self {
429                Self::English => "en",
430                Self::German => "de",
431                Self::Japanese => "ja",
432            }
433        }
434    }
435
436    const LANGUAGES: &[Language] =
437        &[Language::English, Language::German, Language::Japanese];
438
439    #[test]
440    fn quality_cmp() {
441        assert_eq!(Quality(0.5), Quality(0.5));
442        assert_ne!(Quality(0.5), Quality(0.6));
443        assert!(Quality(0.5) < Quality(0.6));
444        assert!(Quality(0.7) > Quality(0.6));
445        assert!(Quality(0.5) <= Quality(0.6));
446        assert!(Quality(0.7) >= Quality(0.6));
447        assert!(Quality(0.5) >= Quality(0.5));
448        assert!(Quality(0.5) <= Quality(0.5));
449    }
450
451    #[test]
452    fn quality_cmp_float() {
453        assert_eq!(Quality(0.5), 0.5);
454        assert_ne!(Quality(0.5), 0.6);
455    }
456
457    #[test]
458    fn quality_clone() {
459        let quality = Quality(0.5);
460        assert_eq!(quality, quality.clone());
461    }
462
463    #[test]
464    fn quality_debug() {
465        assert_eq!(format!("{:?}", Quality(0.5)), "Quality(0.5)");
466    }
467
468    #[test]
469    fn quality_into_float() {
470        assert_eq!(f32::from(Quality(0.5)), 0.5);
471    }
472
473    #[test]
474    fn quality_from_float() {
475        assert_eq!(Quality::try_from(0.5), Ok(Quality(0.5)));
476        assert_eq!(Quality::try_from(f32::INFINITY), Err(()));
477        assert_eq!(Quality::try_from(f32::NEG_INFINITY), Err(()));
478        assert_eq!(Quality::try_from(f32::NAN), Err(()));
479    }
480
481    #[test]
482    fn test_parse_accept_single() {
483        assert_eq!(
484            parse_accept_single("en"),
485            AcceptLanguage {
486                language: Some("en"),
487                quality: Quality(1.0),
488            }
489        );
490        assert_eq!(
491            parse_accept_single("en;q=0.5"),
492            AcceptLanguage {
493                language: Some("en"),
494                quality: Quality(0.5),
495            }
496        );
497        assert_eq!(
498            parse_accept_single("*"),
499            AcceptLanguage {
500                language: None,
501                quality: Quality(1.0),
502            }
503        );
504        assert_eq!(
505            parse_accept_single("*x"),
506            AcceptLanguage {
507                language: None,
508                quality: Quality(1.0),
509            }
510        );
511        assert_eq!(
512            parse_accept_single("*;q=0.5"),
513            AcceptLanguage {
514                language: None,
515                quality: Quality(0.5),
516            }
517        );
518        assert_eq!(
519            parse_accept_single("q=0.5"),
520            AcceptLanguage {
521                language: Some("q"),
522                quality: Quality(0.5),
523            }
524        );
525        assert_eq!(
526            parse_accept_single("*q=0.5"),
527            AcceptLanguage {
528                language: None,
529                quality: Quality(0.5),
530            }
531        );
532        assert_eq!(
533            parse_accept_single("en;q=x"),
534            AcceptLanguage {
535                language: Some("en"),
536                quality: Quality(0.0),
537            }
538        );
539        assert_eq!(
540            parse_accept_single("en;q=inf"),
541            AcceptLanguage {
542                language: Some("en"),
543                quality: Quality(0.0),
544            }
545        );
546        assert_eq!(
547            parse_accept_single("en;q=nan"),
548            AcceptLanguage {
549                language: Some("en"),
550                quality: Quality(0.0),
551            }
552        );
553        assert_eq!(
554            parse_accept_single(""),
555            AcceptLanguage {
556                language: None,
557                quality: Quality(0.0),
558            }
559        );
560    }
561
562    #[test]
563    fn test_parse_accept() {
564        assert_eq!(
565            parse_accept("en, ja;q=0.2, , de;q=0.5, *;q=0.1"),
566            vec![
567                AcceptLanguage {
568                    language: Some("en"),
569                    quality: Quality(1.0),
570                },
571                AcceptLanguage {
572                    language: Some("de"),
573                    quality: Quality(0.5),
574                },
575                AcceptLanguage {
576                    language: Some("ja"),
577                    quality: Quality(0.2),
578                },
579                AcceptLanguage {
580                    language: None,
581                    quality: Quality(0.1),
582                },
583            ]
584        );
585    }
586
587    #[test]
588    fn test_prefixes() {
589        assert_eq!(
590            prefixes("a-b-c").collect::<Vec<_>>(),
591            vec!["a-b-c", "a-b", "a"],
592        )
593    }
594
595    #[test]
596    fn test_alternatives() {
597        assert_eq!(
598            alternatives("a-b-c").collect::<Vec<_>>(),
599            vec!["a-b-c", "a-b", "a"]
600        );
601        assert_eq!(
602            alternatives("zh-cn").collect::<Vec<_>>(),
603            vec!["zh-cn", "zh-hans", "zh"]
604        );
605    }
606
607    #[test]
608    fn test_match_lang() {
609        let languages = LANGUAGES.iter().copied();
610        assert_eq!(
611            match_lang(languages.clone(), "en"),
612            Some(Language::English)
613        );
614        assert_eq!(
615            match_lang(languages.clone(), "en-gb"),
616            Some(Language::English)
617        );
618        assert_eq!(match_lang(languages.clone(), "de"), Some(Language::German));
619        assert_eq!(
620            match_lang(languages.clone(), "de-de"),
621            Some(Language::German)
622        );
623        assert_eq!(
624            match_lang(languages.clone(), "ja"),
625            Some(Language::Japanese)
626        );
627        assert_eq!(
628            match_lang(languages.clone(), "ja-ja"),
629            Some(Language::Japanese)
630        );
631        assert_eq!(match_lang(languages.clone(), "fi"), None);
632        assert_eq!(match_lang(languages.clone(), ""), None);
633    }
634
635    #[test]
636    fn test_match_lang_case() {
637        assert_eq!(match_lang(["DE", "EN", "JA"], "en"), Some("EN"));
638    }
639
640    #[test]
641    fn test_match_lang_string() {
642        assert_eq!(
643            match_lang(["de".to_string(), "en".to_string()], "en"),
644            Some("en".to_string())
645        );
646    }
647
648    #[test]
649    fn test_match_accept() {
650        let languages = LANGUAGES.iter().copied();
651        assert_eq!(
652            match_accept(languages.clone(), "en"),
653            Some(Language::English)
654        );
655        assert_eq!(
656            match_accept(languages.clone(), "en, de, ja"),
657            Some(Language::English)
658        );
659        assert_eq!(
660            match_accept(languages.clone(), "en-gb, de, ja"),
661            Some(Language::English)
662        );
663        assert_eq!(
664            match_accept(languages.clone(), "de"),
665            Some(Language::German)
666        );
667        assert_eq!(
668            match_accept(languages.clone(), "en;q=0.1, de;q=0.9"),
669            Some(Language::German)
670        );
671        assert_eq!(match_accept(languages.clone(), ""), None);
672        assert_eq!(match_accept(languages.clone(), "fi"), None);
673        assert_eq!(match_accept(languages.clone(), "en;q=-1"), None);
674        assert_eq!(match_accept(languages.clone(), "de;q=0.5, *;q=0.8"), None);
675    }
676}