fluent_langneg/negotiate/
mod.rs

1//! Language Negotiation is a process in which locales from different
2//! sources are filtered and sorted in an effort to produce the best
3//! possible selection of them.
4//!
5//! There are multiple language negotiation strategies, most popular is
6//! described in [RFC4647](https://www.ietf.org/rfc/rfc4647.txt).
7//!
8//! The algorithm is based on the BCP4647 3.3.2 Extended Filtering algorithm,
9//! with several modifications.
10//!
11//! # Example:
12//!
13//! ```
14//! use fluent_langneg::negotiate_languages;
15//! use fluent_langneg::NegotiationStrategy;
16//! use fluent_langneg::convert_vec_str_to_langids_lossy;
17//! use icu_locid::LanguageIdentifier;
18//!
19//! let requested = convert_vec_str_to_langids_lossy(&["pl", "fr", "en-US"]);
20//! let available = convert_vec_str_to_langids_lossy(&["it", "de", "fr", "en-GB", "en_US"]);
21//! let default: LanguageIdentifier = "en-US".parse().expect("Parsing langid failed.");
22//!
23//! let supported = negotiate_languages(
24//!   &requested,
25//!   &available,
26//!   Some(&default),
27//!   NegotiationStrategy::Filtering
28//! );
29//!
30//! let expected = convert_vec_str_to_langids_lossy(&["fr", "en-US", "en-GB"]);
31//! assert_eq!(supported,
32//!            expected.iter().map(|t| t.as_ref()).collect::<Vec<&LanguageIdentifier>>());
33//! ```
34//!
35//! # The exact algorithm is custom, and consists of a 6 level strategy:
36//!
37//! ### 1) Attempt to find an exact match for each requested locale in available locales.
38//!
39//! Example:
40//!
41//! ```text
42//! // [requested] * [available] = [supported]
43//!
44//! ["en-US"] * ["en-US"] = ["en-US"]
45//! ```
46//!
47//! ### 2) Attempt to match a requested locale to an available locale treated as a locale range.
48//!
49//! Example:
50//!
51//! ```text
52//! // [requested] * [available] = [supported]
53//!
54//! ["en-US"] * ["en"] = ["en"]
55//!               ^^
56//!                |-- becomes "en-*-*-*"
57//! ```
58//!
59//! ### 3) Maximize the requested locale to find the best match in available locales.
60//!
61//! This part uses ICU's likelySubtags or similar database.
62//!
63//! Example:
64//!
65//! ```text
66//! // [requested] * [available] = [supported]
67//!
68//! ["en"] * ["en-GB", "en-US"] = ["en-US"]
69//!   ^^       ^^^^^    ^^^^^
70//!    |           |        |
71//!    |           |----------- become "en-*-GB-*" and "en-*-US-*"
72//!    |
73//!    |-- ICU likelySubtags expands it to "en-Latn-US"
74//! ```
75//!
76//! ### 4) Attempt to look up for a different variant of the same locale.
77//!
78//! Example:
79//!
80//! ```text
81//! // [requested] * [available] = [supported]
82//!
83//! ["ja-JP-win"] * ["ja-JP-mac"] = ["ja-JP-mac"]
84//!   ^^^^^^^^^       ^^^^^^^^^
85//!           |               |-- become "ja-*-JP-mac"
86//!           |
87//!           |----------- replace variant with range: "ja-JP-*"
88//! ```
89//!
90//! ### 5) Look up for a maximized version of the requested locale, stripped of the region code.
91//!
92//! Example:
93//!
94//! ```text
95//! // [requested] * [available] = [supported]
96//!
97//! ["en-CA"] * ["en-ZA", "en-US"] = ["en-US", "en-ZA"]
98//!   ^^^^^
99//!       |       ^^^^^    ^^^^^
100//!       |           |        |
101//!       |           |----------- become "en-*-ZA-*" and "en-*-US-*"
102//!       |
103//!       |----------- strip region produces "en", then lookup likelySubtag: "en-Latn-US"
104//! ```
105//!
106//!
107//! ### 6) Attempt to look up for a different region of the same locale.
108//!
109//! Example:
110//!
111//! ```text
112//! // [requested] * [available] = [supported]
113//!
114//! ["en-GB"] * ["en-AU"] = ["en-AU"]
115//!   ^^^^^       ^^^^^
116//!       |           |-- become "en-*-AU-*"
117//!       |
118//!       |----- replace region with range: "en-*"
119//! ```
120//!
121
122use icu_locid::LanguageIdentifier;
123
124#[cfg(not(feature = "cldr"))]
125mod likely_subtags;
126#[cfg(feature = "cldr")]
127use icu_locid_transform::{LocaleExpander, TransformResult};
128#[cfg(not(feature = "cldr"))]
129use likely_subtags::{LocaleExpander, TransformResult};
130
131#[derive(PartialEq, Debug, Clone, Copy)]
132pub enum NegotiationStrategy {
133    Filtering,
134    Matching,
135    Lookup,
136}
137
138fn subtag_matches<P: PartialEq>(
139    subtag1: &Option<P>,
140    subtag2: &Option<P>,
141    as_range1: bool,
142    as_range2: bool,
143) -> bool {
144    (as_range1 && subtag1.is_none()) || (as_range2 && subtag2.is_none()) || subtag1 == subtag2
145}
146
147#[inline(always)]
148fn matches(
149    lid1: &LanguageIdentifier,
150    lid2: &LanguageIdentifier,
151    range1: bool,
152    range2: bool,
153) -> bool {
154    ((range1 && lid1.language.is_empty())
155        || (range2 && lid2.language.is_empty())
156        || lid1.language == lid2.language)
157        && subtag_matches(&lid1.script, &lid2.script, range1, range2)
158        && subtag_matches(&lid1.region, &lid2.region, range1, range2)
159        && ((range1 && lid1.variants.is_empty())
160            || (range2 && lid2.variants.is_empty())
161            || lid1.variants == lid2.variants)
162}
163
164pub fn filter_matches<'a, R: 'a + AsRef<LanguageIdentifier>, A: 'a + AsRef<LanguageIdentifier>>(
165    requested: &[R],
166    available: &'a [A],
167    strategy: NegotiationStrategy,
168) -> Vec<&'a A> {
169    let mut lc: Option<LocaleExpander> = None;
170
171    let mut supported_locales = vec![];
172
173    let mut available_locales: Vec<&A> = available.iter().collect();
174
175    macro_rules! test_strategy {
176        ($req:ident, $self_as_range:expr, $other_as_range:expr) => {{
177            let mut match_found = false;
178            available_locales.retain(|locale| {
179                if strategy != NegotiationStrategy::Filtering && match_found {
180                    return true;
181                }
182
183                if matches(locale.as_ref(), &$req, $self_as_range, $other_as_range) {
184                    match_found = true;
185                    supported_locales.push(*locale);
186                    return false;
187                }
188                true
189            });
190
191            if match_found {
192                match strategy {
193                    NegotiationStrategy::Filtering => {}
194                    NegotiationStrategy::Matching => continue,
195                    NegotiationStrategy::Lookup => break,
196                }
197            }
198        }};
199    }
200
201    for req in requested {
202        let req = req.as_ref();
203
204        // 1) Try to find a simple (case-insensitive) string match for the request.
205        test_strategy!(req, false, false);
206
207        // 2) Try to match against the available locales treated as ranges.
208        test_strategy!(req, true, false);
209
210        // Per Unicode TR35, 4.4 Locale Matching, we don't add likely subtags to
211        // requested locales, so we'll skip it from the rest of the steps.
212        if req.language.is_empty() {
213            continue;
214        }
215
216        let mut req = req.to_owned();
217        // 3) Try to match against a maximized version of the requested locale
218        let lc = lc.get_or_insert_with(LocaleExpander::new);
219        if lc.maximize(&mut req) == TransformResult::Modified {
220            test_strategy!(req, true, false);
221        }
222
223        // 4) Try to match against a variant as a range
224        req.variants.clear();
225        test_strategy!(req, true, true);
226
227        // 5) Try to match against the likely subtag without region
228        req.region = None;
229        if lc.maximize(&mut req) == TransformResult::Modified {
230            test_strategy!(req, true, false);
231        }
232
233        // 6) Try to match against a region as a range
234        req.region = None;
235        test_strategy!(req, true, true);
236    }
237
238    supported_locales
239}
240
241pub fn negotiate_languages<
242    'a,
243    R: 'a + AsRef<LanguageIdentifier>,
244    A: 'a + AsRef<LanguageIdentifier> + PartialEq,
245>(
246    requested: &[R],
247    available: &'a [A],
248    default: Option<&'a A>,
249    strategy: NegotiationStrategy,
250) -> Vec<&'a A> {
251    let mut supported = filter_matches(requested, available, strategy);
252
253    if let Some(default) = default {
254        if strategy == NegotiationStrategy::Lookup {
255            if supported.is_empty() {
256                supported.push(default);
257            }
258        } else if !supported.contains(&default) {
259            supported.push(default);
260        }
261    }
262    supported
263}