Skip to main content

rsvg/
accept_language.rs

1//! Parser for an Accept-Language HTTP header.
2
3use language_tags::{LanguageTag, ParseError};
4use locale_config::{LanguageRange, Locale};
5
6use std::error;
7use std::fmt;
8use std::str::FromStr;
9use std::sync::{Arc, OnceLock};
10
11#[cfg(doc)]
12use crate::api::CairoRenderer;
13use crate::rsvg_log;
14use crate::session::Session;
15
16/// Used to set the language for rendering.
17///
18/// SVG documents can use the `<switch>` element, whose children have a `systemLanguage`
19/// attribute; only the first child which has a `systemLanguage` that matches the
20/// preferred languages will be rendered.
21///
22/// This enum, used with [`CairoRenderer::with_language`], configures how to obtain the
23/// user's prefererred languages.
24pub enum Language {
25    /// Use the Unix environment variables `LANGUAGE`, `LC_ALL`, `LC_MESSAGES` and `LANG` to obtain the
26    /// user's language.
27    ///
28    /// This uses [`g_get_language_names()`][ggln] underneath.
29    ///
30    /// [ggln]: https://docs.gtk.org/glib/func.get_language_names.html
31    FromEnvironment,
32
33    /// Use a list of languages in the form of an HTTP Accept-Language header, like `es, en;q=0.8`.
34    ///
35    /// This is convenient when you want to select an explicit set of languages, instead of
36    /// assuming that the Unix environment has the language you want.
37    AcceptLanguage(AcceptLanguage),
38}
39
40/// `Language` but with the environment's locale converted to something we can use.
41#[derive(Clone)]
42pub enum UserLanguage {
43    LanguageTags(LanguageTags),
44    AcceptLanguage(AcceptLanguage),
45    FromEnvironment(Arc<OnceLock<LanguageTags>>),
46}
47
48#[derive(Clone, Debug, PartialEq)]
49struct Weight(Option<f32>);
50
51impl Weight {
52    fn numeric(&self) -> f32 {
53        self.0.unwrap_or(1.0)
54    }
55}
56
57#[derive(Clone, Debug, PartialEq)]
58struct Item {
59    tag: LanguageTag,
60    weight: Weight,
61}
62
63/// Stores a parsed version of an HTTP Accept-Language header.
64///
65/// RFC 7231: <https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.5>
66#[derive(Clone, Debug, PartialEq)]
67pub struct AcceptLanguage(Box<[Item]>);
68
69/// Errors when parsing an `AcceptLanguage`.
70#[derive(Debug, PartialEq)]
71enum AcceptLanguageError {
72    NoElements,
73    InvalidCharacters,
74    InvalidLanguageTag(ParseError),
75    InvalidWeight,
76}
77
78impl error::Error for AcceptLanguageError {}
79
80impl fmt::Display for AcceptLanguageError {
81    // Skipped for mutation testing.  This is just an error formatter.  In the public API,
82    // `Language::AcceptLanguage(AcceptLanguage)` is constructible via
83    // `AcceptLanguage::parse()`, which returns `Result<Self, String>`.  These strings go
84    // there, but librsvg makes no promises about their contents.  Below, we do have tests
85    // for `AcceptLanguageError` being computed correctly.
86    #[mutants::skip]
87    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88        match self {
89            Self::NoElements => write!(f, "no language tags in list"),
90            Self::InvalidCharacters => write!(f, "invalid characters in language list"),
91            Self::InvalidLanguageTag(e) => write!(f, "invalid language tag: {e}"),
92            Self::InvalidWeight => write!(f, "invalid q= weight"),
93        }
94    }
95}
96
97/// Optional whitespace, Space or Tab, per RFC 7230.
98///
99/// RFC 7230: <https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.3>
100const OWS: [char; 2] = ['\x20', '\x09'];
101
102impl AcceptLanguage {
103    /// Parses the payload of an HTTP Accept-Language header.
104    ///
105    /// For example, a valid header looks like `es, en;q=0.8`, and means, "I prefer Spanish,
106    /// but will also accept English".
107    ///
108    /// Use this function to construct a [`Language::AcceptLanguage`]
109    /// variant to pass to the [`CairoRenderer::with_language`] function.
110    ///
111    /// See RFC 7231 for details: <https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.5>
112    pub fn parse(s: &str) -> Result<AcceptLanguage, String> {
113        AcceptLanguage::parse_internal(s).map_err(|e| format!("{}", e))
114    }
115
116    /// Internal constructor.  We don't expose [`AcceptLanguageError`] in the public API;
117    /// there we just use a [`String`].
118    fn parse_internal(s: &str) -> Result<AcceptLanguage, AcceptLanguageError> {
119        if !s.is_ascii() {
120            return Err(AcceptLanguageError::InvalidCharacters);
121        }
122
123        let mut items = Vec::new();
124
125        for val in s.split(',') {
126            let trimmed = val.trim_matches(&OWS[..]);
127            if trimmed.is_empty() {
128                continue;
129            }
130
131            items.push(Item::parse(trimmed)?);
132        }
133
134        if items.is_empty() {
135            Err(AcceptLanguageError::NoElements)
136        } else {
137            Ok(AcceptLanguage(items.into_boxed_slice()))
138        }
139    }
140
141    fn iter(&self) -> impl Iterator<Item = (&LanguageTag, f32)> {
142        self.0.iter().map(|item| (&item.tag, item.weight.numeric()))
143    }
144
145    fn any_matches(&self, tag: &LanguageTag) -> bool {
146        self.iter().any(|(self_tag, _weight)| tag.matches(self_tag))
147    }
148}
149
150impl Item {
151    fn parse(s: &str) -> Result<Item, AcceptLanguageError> {
152        let semicolon_pos = s.find(';');
153
154        let (before_semicolon, after_semicolon) = if let Some(semi) = semicolon_pos {
155            (&s[..semi], Some(&s[semi + 1..]))
156        } else {
157            (s, None)
158        };
159
160        let tag = LanguageTag::parse(before_semicolon)
161            .map_err(AcceptLanguageError::InvalidLanguageTag)?;
162
163        let weight = if let Some(quality) = after_semicolon {
164            let quality = quality.trim_start_matches(&OWS[..]);
165
166            let number = if let Some(qvalue) = quality.strip_prefix("q=") {
167                if qvalue.starts_with(&['0', '1'][..]) {
168                    let first_digit = qvalue.chars().next().unwrap();
169
170                    if let Some(decimals) = qvalue[1..].strip_prefix('.') {
171                        if (first_digit == '0'
172                            && decimals.len() <= 3
173                            && decimals.chars().all(|c| c.is_ascii_digit()))
174                            || (first_digit == '1'
175                                && decimals.len() <= 3
176                                && decimals.chars().all(|c| c == '0'))
177                        {
178                            qvalue
179                        } else {
180                            return Err(AcceptLanguageError::InvalidWeight);
181                        }
182                    } else {
183                        qvalue
184                    }
185                } else {
186                    return Err(AcceptLanguageError::InvalidWeight);
187                }
188            } else {
189                return Err(AcceptLanguageError::InvalidWeight);
190            };
191
192            Weight(Some(
193                f32::from_str(number).map_err(|_| AcceptLanguageError::InvalidWeight)?,
194            ))
195        } else {
196            Weight(None)
197        };
198
199        Ok(Item { tag, weight })
200    }
201}
202
203/// A list of BCP47 language tags.
204///
205/// RFC 5664: <https://www.rfc-editor.org/info/rfc5664>
206#[derive(Debug, Clone, PartialEq)]
207pub struct LanguageTags(Box<[LanguageTag]>);
208
209impl LanguageTags {
210    pub fn empty() -> Self {
211        LanguageTags(Box::new([]))
212    }
213
214    /// Converts a `Locale` to a set of language tags.
215    pub fn from_locale(locale: &Locale) -> Result<LanguageTags, String> {
216        let mut tags = Vec::new();
217
218        for locale_range in locale.tags_for("messages") {
219            if locale_range == LanguageRange::invariant() {
220                continue;
221            }
222
223            let str_locale_range = locale_range.as_ref();
224
225            let locale_tag = LanguageTag::from_str(str_locale_range).map_err(|e| {
226                format!("invalid language tag \"{str_locale_range}\" in locale: {e}")
227            })?;
228
229            if !locale_tag.is_language_range() {
230                return Err(format!(
231                    "language tag \"{locale_tag}\" is not a language range"
232                ));
233            }
234
235            tags.push(locale_tag);
236        }
237
238        Ok(LanguageTags(Box::from(tags)))
239    }
240
241    pub fn from(tags: Vec<LanguageTag>) -> LanguageTags {
242        LanguageTags(Box::from(tags))
243    }
244
245    pub fn iter(&self) -> impl Iterator<Item = &LanguageTag> {
246        self.0.iter()
247    }
248
249    pub fn any_matches(&self, language_tag: &LanguageTag) -> bool {
250        self.0.iter().any(|tag| tag.matches(language_tag))
251    }
252}
253
254impl UserLanguage {
255    pub fn new(language: &crate::api::Language) -> UserLanguage {
256        match *language {
257            crate::api::Language::FromEnvironment => {
258                UserLanguage::FromEnvironment(Arc::new(OnceLock::new()))
259            }
260
261            crate::api::Language::AcceptLanguage(ref a) => UserLanguage::AcceptLanguage(a.clone()),
262        }
263    }
264
265    pub fn any_matches(&self, tags: &LanguageTags, session: &Session) -> bool {
266        match *self {
267            UserLanguage::LanguageTags(ref language_tags) => {
268                tags.iter().any(|tag| language_tags.any_matches(tag))
269            }
270            UserLanguage::AcceptLanguage(ref accept_language) => {
271                tags.iter().any(|tag| accept_language.any_matches(tag))
272            }
273            UserLanguage::FromEnvironment(ref once_lock) => {
274                let language_tags =
275                    once_lock.get_or_init(|| get_language_tags_from_environment(session));
276                tags.iter().any(|tag| language_tags.any_matches(tag))
277            }
278        }
279    }
280}
281
282/// Gets language tags from the environment.
283///
284/// This function is thread-safe.
285fn get_language_tags_from_environment(session: &Session) -> LanguageTags {
286    LanguageTags::from_locale(&locale_from_environment())
287        .map_err(|s| {
288            rsvg_log!(session, "could not convert locale to language tags: {}", s);
289        })
290        .unwrap_or_else(|_| LanguageTags::empty())
291}
292
293/// Gets the user's preferred locale from the environment and
294/// translates it to a `Locale` with `LanguageRange` fallbacks.
295fn locale_from_environment() -> Locale {
296    let mut locale = Locale::invariant();
297
298    for name in glib::language_names() {
299        let name = name.as_str();
300        if let Ok(range) = LanguageRange::from_unix(name) {
301            locale.add(&range);
302        }
303    }
304
305    locale
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311
312    #[test]
313    fn parses_accept_language() {
314        // plain tag
315        assert_eq!(
316            AcceptLanguage::parse_internal("es-MX").unwrap(),
317            AcceptLanguage(
318                vec![Item {
319                    tag: LanguageTag::parse("es-MX").unwrap(),
320                    weight: Weight(None)
321                }]
322                .into_boxed_slice()
323            )
324        );
325
326        // with quality
327        assert_eq!(
328            AcceptLanguage::parse_internal("es-MX;q=1").unwrap(),
329            AcceptLanguage(
330                vec![Item {
331                    tag: LanguageTag::parse("es-MX").unwrap(),
332                    weight: Weight(Some(1.0))
333                }]
334                .into_boxed_slice()
335            )
336        );
337
338        // with quality
339        assert_eq!(
340            AcceptLanguage::parse_internal("es-MX;q=0").unwrap(),
341            AcceptLanguage(
342                vec![Item {
343                    tag: LanguageTag::parse("es-MX").unwrap(),
344                    weight: Weight(Some(0.0))
345                }]
346                .into_boxed_slice()
347            )
348        );
349
350        // zero decimals are allowed
351        assert_eq!(
352            AcceptLanguage::parse_internal("es-MX;q=0.").unwrap(),
353            AcceptLanguage(
354                vec![Item {
355                    tag: LanguageTag::parse("es-MX").unwrap(),
356                    weight: Weight(Some(0.0))
357                }]
358                .into_boxed_slice()
359            )
360        );
361
362        // zero decimals are allowed
363        assert_eq!(
364            AcceptLanguage::parse_internal("es-MX;q=1.").unwrap(),
365            AcceptLanguage(
366                vec![Item {
367                    tag: LanguageTag::parse("es-MX").unwrap(),
368                    weight: Weight(Some(1.0))
369                }]
370                .into_boxed_slice()
371            )
372        );
373
374        // one decimal
375        assert_eq!(
376            AcceptLanguage::parse_internal("es-MX;q=1.0").unwrap(),
377            AcceptLanguage(
378                vec![Item {
379                    tag: LanguageTag::parse("es-MX").unwrap(),
380                    weight: Weight(Some(1.0))
381                }]
382                .into_boxed_slice()
383            )
384        );
385
386        // two decimals
387        assert_eq!(
388            AcceptLanguage::parse_internal("es-MX;q=1.00").unwrap(),
389            AcceptLanguage(
390                vec![Item {
391                    tag: LanguageTag::parse("es-MX").unwrap(),
392                    weight: Weight(Some(1.0))
393                }]
394                .into_boxed_slice()
395            )
396        );
397
398        // three decimals
399        assert_eq!(
400            AcceptLanguage::parse_internal("es-MX;q=1.000").unwrap(),
401            AcceptLanguage(
402                vec![Item {
403                    tag: LanguageTag::parse("es-MX").unwrap(),
404                    weight: Weight(Some(1.0))
405                }]
406                .into_boxed_slice()
407            )
408        );
409
410        // multiple elements
411        assert_eq!(
412            AcceptLanguage::parse_internal("es-MX, en; q=0.5").unwrap(),
413            AcceptLanguage(
414                vec![
415                    Item {
416                        tag: LanguageTag::parse("es-MX").unwrap(),
417                        weight: Weight(None)
418                    },
419                    Item {
420                        tag: LanguageTag::parse("en").unwrap(),
421                        weight: Weight(Some(0.5))
422                    },
423                ]
424                .into_boxed_slice()
425            )
426        );
427
428        // superfluous whitespace
429        assert_eq!(
430            AcceptLanguage::parse_internal(",es-MX;q=1.000  , en; q=0.125  ,  ,").unwrap(),
431            AcceptLanguage(
432                vec![
433                    Item {
434                        tag: LanguageTag::parse("es-MX").unwrap(),
435                        weight: Weight(Some(1.0))
436                    },
437                    Item {
438                        tag: LanguageTag::parse("en").unwrap(),
439                        weight: Weight(Some(0.125))
440                    },
441                ]
442                .into_boxed_slice()
443            )
444        );
445    }
446
447    #[test]
448    fn empty_lists() {
449        assert!(matches!(
450            AcceptLanguage::parse_internal(""),
451            Err(AcceptLanguageError::NoElements)
452        ));
453
454        assert!(matches!(
455            AcceptLanguage::parse_internal(","),
456            Err(AcceptLanguageError::NoElements)
457        ));
458
459        assert!(matches!(
460            AcceptLanguage::parse_internal(", , ,,,"),
461            Err(AcceptLanguageError::NoElements)
462        ));
463    }
464
465    #[test]
466    fn ascii_only() {
467        assert!(matches!(
468            AcceptLanguage::parse_internal("ës"),
469            Err(AcceptLanguageError::InvalidCharacters)
470        ));
471    }
472
473    #[test]
474    fn invalid_tag() {
475        assert!(matches!(
476            AcceptLanguage::parse_internal("no_underscores"),
477            Err(AcceptLanguageError::InvalidLanguageTag(_))
478        ));
479    }
480
481    #[test]
482    fn invalid_weight() {
483        assert!(matches!(
484            AcceptLanguage::parse_internal("es;"),
485            Err(AcceptLanguageError::InvalidWeight)
486        ));
487        assert!(matches!(
488            AcceptLanguage::parse_internal("es;q"),
489            Err(AcceptLanguageError::InvalidWeight)
490        ));
491        assert!(matches!(
492            AcceptLanguage::parse_internal("es;q="),
493            Err(AcceptLanguageError::InvalidWeight)
494        ));
495        assert!(matches!(
496            AcceptLanguage::parse_internal("es;q=2"),
497            Err(AcceptLanguageError::InvalidWeight)
498        ));
499        assert!(matches!(
500            AcceptLanguage::parse_internal("es;q=1.1"),
501            Err(AcceptLanguageError::InvalidWeight)
502        ));
503        assert!(matches!(
504            AcceptLanguage::parse_internal("es;q=1.12"),
505            Err(AcceptLanguageError::InvalidWeight)
506        ));
507        assert!(matches!(
508            AcceptLanguage::parse_internal("es;q=1.123"),
509            Err(AcceptLanguageError::InvalidWeight)
510        ));
511
512        // Up to three decimals allowed per RFC 7231
513        assert!(matches!(
514            AcceptLanguage::parse_internal("es;q=0.1234"),
515            Err(AcceptLanguageError::InvalidWeight)
516        ));
517    }
518
519    #[test]
520    fn iter() {
521        let accept_language = AcceptLanguage::parse_internal("es-MX, en; q=0.5").unwrap();
522        let mut iter = accept_language.iter();
523
524        let (tag, weight) = iter.next().unwrap();
525        assert_eq!(*tag, LanguageTag::parse("es-MX").unwrap());
526        assert_eq!(weight, 1.0);
527
528        let (tag, weight) = iter.next().unwrap();
529        assert_eq!(*tag, LanguageTag::parse("en").unwrap());
530        assert_eq!(weight, 0.5);
531
532        assert!(iter.next().is_none());
533    }
534
535    #[test]
536    fn matches() {
537        let accept_language = AcceptLanguage::parse_internal("es-MX, en; q=0.5").unwrap();
538
539        assert!(accept_language.any_matches(&LanguageTag::parse("es-MX").unwrap()));
540        assert!(accept_language.any_matches(&LanguageTag::parse("en").unwrap()));
541
542        assert!(!accept_language.any_matches(&LanguageTag::parse("fr").unwrap()));
543    }
544}