unicode_language/
lib.rs

1include!(concat!(env!("OUT_DIR"), "/data.rs"));
2
3use std::cmp;
4
5#[derive(Debug)]
6pub struct Match {
7    /// BCP 47 language tag.
8    pub tag: &'static str,
9    /// English name.
10    pub name: &'static str,
11    /// Name in native script.
12    pub native: &'static str,
13    /// Number of codepoints matched.
14    pub count: u32,
15    /// Score (number of codepoints matched divided by the total).
16    pub score: f64,
17}
18
19/// Detects language support in a font given a list of Unicode
20/// codepoint ranges.
21///
22/// # Arguments
23///
24/// * `codepoints` - An iterator of codepoint ranges. The iterator
25///   must not contain overlapping ranges and must be sorted in
26///   ascending order.
27/// * `threshold` - The minimum score a language must have to be
28/// returned as a match. Value must be between 0 and 1.
29///
30/// Returns a vector of language matches.
31pub fn detect<T>(codepoints: T, threshold: f64) -> Vec<Match>
32where
33    T: IntoIterator<Item = Range<Codepoint>>,
34{
35    let mut counts = [0; LANGUAGE_COUNT];
36
37    for [input_lower, input_upper] in codepoints {
38        for i in 0..LANGUAGE_COUNT {
39            for [range_lower, range_upper] in RANGES[i] {
40                if input_lower <= *range_upper && *range_lower <= input_upper {
41                    counts[i] += cmp::min(input_upper, *range_upper)
42                        - cmp::max(input_lower, *range_lower)
43                        + 1;
44                }
45
46                if input_upper <= *range_upper {
47                    break;
48                }
49            }
50        }
51    }
52
53    let mut result = Vec::new();
54
55    for i in 0..LANGUAGE_COUNT {
56        let score = counts[i] as f64 / TOTALS[i] as f64;
57        if score >= threshold && counts[i] > 0 {
58            result.push(Match {
59                tag: METADATA[i].tag,
60                name: METADATA[i].name,
61                native: METADATA[i].native_name,
62                count: counts[i],
63                score,
64            });
65        }
66    }
67
68    result.sort_by(|a, b| a.score.partial_cmp(&b.score).unwrap().reverse());
69
70    result
71}
72
73#[cfg(test)]
74mod tests {
75    use super::*;
76
77    #[test]
78    fn it_returns_an_empty_array() {
79        let result = detect([], 0.5);
80        assert_eq!(result.len(), 0);
81    }
82
83    #[test]
84    fn it_takes_a_vector() {
85        let codepoints = vec![[1, 3]];
86
87        let result = detect(codepoints, 1.0);
88        assert_eq!(result.len(), 1);
89        assert_eq!(result[0].tag, "t1");
90        assert_eq!(result[0].name, "test1");
91    }
92
93    #[test]
94    fn it_takes_an_array() {
95        let codepoints = [[1, 3]];
96
97        let result = detect(codepoints, 1.0);
98        assert_eq!(result.len(), 1);
99        assert_eq!(result[0].tag, "t1");
100        assert_eq!(result[0].name, "test1");
101    }
102
103    #[test]
104    fn it_returns_an_empty_array_with_an_invalid_codepoint() {
105        let result = detect([[256, 256]], 0.5);
106        assert_eq!(result.len(), 0);
107    }
108
109    #[test]
110    fn it_returns_the_test_language() {
111        let result = detect([[1, 1]], 0.0);
112        assert_eq!(result.len(), 1);
113        assert_eq!(result[0].tag, "t1");
114        assert_eq!(result[0].name, "test1")
115    }
116
117    #[test]
118    fn it_does_not_return_if_threshold_not_met() {
119        let result = detect([[1, 2]], 1.0);
120        assert_eq!(result.len(), 0);
121    }
122
123    #[test]
124    fn it_returns_if_threshold_is_met() {
125        let result = detect([[1, 3]], 1.0);
126        assert_eq!(result.len(), 1);
127        assert_eq!(result[0].tag, "t1");
128        assert_eq!(result[0].name, "test1");
129    }
130
131    #[test]
132    fn it_returns_if_threshold_is_partially_met() {
133        let result = detect([[1, 2]], 0.6);
134        assert_eq!(result.len(), 1);
135        assert_eq!(result[0].tag, "t1");
136        assert_eq!(result[0].name, "test1");
137    }
138
139    #[test]
140    fn it_returns_multiple_languages() {
141        let result = detect([[1, 1], [4, 4]], 0.0);
142        assert_eq!(result.len(), 2);
143        assert_eq!(result[0].tag, "t1");
144        assert_eq!(result[0].name, "test1");
145        assert_eq!(result[1].tag, "t2");
146        assert_eq!(result[1].name, "test2");
147    }
148
149    #[test]
150    fn it_returns_overlapping_languages() {
151        let result = detect([[8, 8]], 0.0);
152        assert_eq!(result.len(), 2);
153        assert_eq!(result[0].tag, "t4");
154        assert_eq!(result[0].name, "test4");
155        assert_eq!(result[1].tag, "t3");
156        assert_eq!(result[1].name, "test3");
157    }
158
159    #[test]
160    fn it_returns_correct_counts_on_partial_range_matches() {
161        let result = detect([[3, 5]], 0.0);
162        assert_eq!(result.len(), 2);
163        assert_eq!(result[0].tag, "t2");
164        assert_eq!(result[0].name, "test2");
165        assert_eq!(result[0].count, 2);
166        assert_eq!(result[1].tag, "t1");
167        assert_eq!(result[1].name, "test1");
168        assert_eq!(result[1].count, 1);
169    }
170
171    #[test]
172    fn it_returns_sorted_results() {
173        let result = detect([[1, 1], [4, 6]], 0.0);
174        assert_eq!(result.len(), 2);
175        assert_eq!(result[0].tag, "t2");
176        assert_eq!(result[1].tag, "t1");
177    }
178
179    #[test]
180    fn it_handles_ranges_correctly() {
181        let result = detect([[12, 20]], 0.0);
182        assert_eq!(result.len(), 1);
183        assert_eq!(result[0].tag, "t5");
184    }
185}