1use std::collections::BTreeMap;
4
5use interavl::IntervalTree;
6
7include!(concat!(env!("OUT_DIR"), "/data.rs"));
8
9#[derive(Clone)]
11pub struct Context {
12 forest: BTreeMap<&'static str, Tree>,
13}
14
15#[derive(Clone, Copy, Default)]
17pub struct Script {
18 pub count: usize,
20 pub total: usize,
22}
23
24#[derive(Clone)]
25struct Tree {
26 count: usize,
27 backend: IntervalTree<u32, ()>,
28}
29
30impl Context {
31 pub fn detect<T>(&self, codepoints: T) -> BTreeMap<&'static str, Script>
33 where
34 T: IntoIterator<Item = [u32; 2]>,
35 {
36 let mut scripts: BTreeMap<&'static str, Script> = Default::default();
37 for [start, end] in codepoints {
38 for (name, tree) in self.forest.iter() {
39 for (range, _) in tree.backend.iter_overlaps(&(start..end + 1)) {
40 let count = range.end.min(end + 1) - range.start.max(start);
41 let script = scripts.entry(*name).or_insert_with(|| Script {
42 count: 0,
43 total: tree.count,
44 });
45 script.count += count as usize;
46 }
47 }
48 }
49 scripts
50 }
51}
52
53impl Default for Context {
54 fn default() -> Self {
55 let mut forest = BTreeMap::default();
56 for (name, ranges) in DATA {
57 forest.insert(*name, Tree::from(*ranges));
58 }
59 Self { forest }
60 }
61}
62
63impl From<&[(u32, u32)]> for Tree {
64 fn from(ranges: &[(u32, u32)]) -> Self {
65 let mut count = 0;
66 let mut backend = IntervalTree::default();
67 for (start, end) in ranges.iter().cloned() {
68 count += (end - start + 1) as usize;
69 backend.insert(start..end + 1, ());
70 }
71 Self { count, backend }
72 }
73}
74
75#[cfg(test)]
76mod tests {
77 use serde::Deserialize;
78
79 use crate::Context;
80
81 macro_rules! ok(($result:expr) => ($result.unwrap()));
82
83 #[derive(Deserialize)]
84 #[serde(untagged)]
85 enum Range {
86 Single(u32),
87 Double(u32, u32),
88 }
89
90 impl From<Range> for [u32; 2] {
91 fn from(range: Range) -> Self {
92 match range {
93 Range::Single(value) => [value, value],
94 Range::Double(start, end) => [start, end],
95 }
96 }
97 }
98
99 #[test]
100 fn proxima_nova() {
101 const CODEPOINTS: &str = "[0, 13, [32, 126], [160, 383], 399, 402, 413, [416, 417], [431, 432], [486, 487], [490, 491], [506, 511], [536, 539], [562, 563], 567, 601, 626, 688, [699, 700], [710, 711], 713, 715, [728, 733], [768, 772], [774, 780], 783, 785, 803, [806, 808], 814, 817, [884, 885], 894, [900, 906], 908, [910, 929], [931, 974], 983, [1024, 1119], [1138, 1141], [1162, 1279], [1296, 1299], [1308, 1309], [1316, 1319], [1326, 1327], [7682, 7683], [7690, 7691], [7710, 7711], [7714, 7715], [7766, 7767], [7776, 7777], [7786, 7787], [7808, 7813], 7838, [7840, 7929], [8192, 8202], [8211, 8213], [8216, 8222], [8224, 8226], 8230, 8240, [8242, 8243], [8249, 8250], [8253, 8254], 8260, 8304, [8308, 8313], [8317, 8329], [8333, 8334], 8353, [8355, 8356], [8358, 8364], 8372, [8376, 8378], [8380, 8381], 8467, [8470, 8471], 8480, 8482, 8486, 8494, [8531, 8532], [8539, 8542], [8592, 8595], 8706, 8710, 8719, [8721, 8722], 8725, [8729, 8730], 8734, 8747, 8776, 8800, [8804, 8805], [8882, 8883], 8901, 8984, 9632, 9650, 9654, 9660, 9664, 9674, 9679, 9733, [9744, 9745], 9830, 10003, 11800, [57348, 57349], [62522, 62523], [62560, 62579], [62616, 62623], [62662, 62663], [62668, 62669], [62674, 62679], [62730, 62731], [62734, 62771], [62774, 62777], [62780, 62783], 63031, 63171, 63197, [63199, 63219], 63743, [64256, 64260]]";
102 compare(
103 CODEPOINTS,
104 vec![
105 ("Cyrillic", 230, 518),
106 ("Emoji", 27, 1589),
107 ("Emoji_Component", 15, 156),
108 ("Greek", 78, 554),
109 ("Inherited", 20, 714),
110 ("Latin", 395, 1528),
111 ("PUA", 115, 137468),
112 ("Unknown", 123, 954979),
113 ],
114 );
115 }
116
117 #[test]
118 fn proxima_nova_arabic() {
119 const CODEPOINTS: &str = "[0, 13, [32, 126], [160, 383], 399, 402, 413, [416, 417], [431, 432], [486, 487], [490, 491], [506, 511], [536, 539], [562, 563], 567, 601, 626, [699, 703], [710, 711], 713, 715, [728, 733], [768, 772], [774, 780], 783, 785, 803, [806, 808], 814, 817, 894, 1548, 1563, 1567, [1569, 1594], [1600, 1622], [1632, 1645], [1648, 1649], 1662, 1700, 1722, [7682, 7683], [7690, 7695], [7710, 7717], [7722, 7723], [7732, 7733], [7766, 7767], [7776, 7779], [7786, 7791], [7808, 7813], [7826, 7827], 7830, 7838, [7840, 7929], [8192, 8202], [8204, 8205], [8211, 8213], [8216, 8222], [8224, 8226], 8230, 8240, [8242, 8243], [8249, 8250], [8253, 8254], 8260, 8304, [8308, 8313], [8320, 8329], 8353, [8355, 8356], [8358, 8364], 8372, [8376, 8378], [8380, 8381], 8467, [8470, 8471], 8480, 8482, 8486, 8494, 8706, 8710, 8719, [8721, 8722], 8725, [8729, 8730], 8734, 8747, 8776, 8800, [8804, 8805], 9674, 9676, 11800, [57348, 57349], 63171, 63197, 63743, [64256, 64260], 64336, 64362, 64414, 64508, [64830, 64831], 65020, 65153, 65155, 65157, 65159, 65165, 65167, 65171, 65173, 65177, 65181, 65185, 65189, 65193, 65195, 65197, 65199, 65201, 65205, 65209, 65213, 65217, 65221, 65225, 65229, 65233, 65237, 65241, 65245, 65249, 65253, 65261, 65263, 65265]";
120
121 compare(
122 CODEPOINTS,
123 vec![
124 ("Arabic", 100, 1469),
125 ("Emoji", 21, 1589),
126 ("Emoji_Component", 16, 156),
127 ("Greek", 2, 554),
128 ("Hebrew", 1, 143),
129 ("Inherited", 36, 714),
130 ("Latin", 414, 1528),
131 ("PUA", 5, 137468),
132 ("Unknown", 8, 954979),
133 ],
134 );
135 }
136
137 #[test]
138 fn proxima_nova_devanagari() {
139 const CODEPOINTS: &str = "[0, 13, [32, 126], [160, 383], 402, 413, [416, 417], [431, 432], [486, 487], [490, 491], [506, 511], [536, 539], [562, 563], 567, 626, [699, 700], [710, 711], 713, 715, [728, 733], [768, 772], [774, 780], 783, 785, 803, [806, 808], 814, 817, 894, [2304, 2431], [7682, 7683], [7690, 7691], [7710, 7711], [7714, 7715], [7766, 7767], [7776, 7777], [7786, 7787], [7808, 7813], 7838, [7840, 7929], [8192, 8202], [8204, 8205], [8211, 8213], [8216, 8222], [8224, 8226], 8230, 8240, [8242, 8243], [8249, 8250], [8253, 8254], 8260, 8304, [8308, 8313], [8320, 8329], 8353, [8355, 8356], [8358, 8364], 8372, [8376, 8378], [8380, 8381], 8471, 8482, 8486, 8706, 8710, 8719, [8721, 8722], 8725, [8729, 8730], 8734, 8747, 8776, 8800, [8804, 8805], 9674, 9676, 11800, 43259, [57348, 57349], 63171, 63197, [64256, 64260]]";
140
141 compare(
142 CODEPOINTS,
143 vec![
144 ("Arabic", 1, 1469),
145 ("Devanagari", 125, 169),
146 ("Emoji", 21, 1589),
147 ("Emoji_Component", 16, 156),
148 ("Greek", 2, 554),
149 ("Inherited", 27, 714),
150 ("Latin", 391, 1528),
151 ("PUA", 4, 137468),
152 ("Unknown", 7, 954979),
153 ],
154 );
155 }
156
157 #[test]
158 fn proxima_nova_hangeul() {
159 const CODEPOINTS: &str = "[0, 13, [32, 126], [160, 383], 402, 413, [416, 417], [431, 432], [486, 487], [490, 491], [506, 511], [536, 539], [562, 563], 567, 626, [699, 700], [710, 711], 713, 715, [728, 733], [768, 772], [774, 780], 783, 785, 803, [806, 808], 814, 817, 894, [7682, 7683], [7690, 7691], [7710, 7711], [7714, 7715], [7766, 7767], [7776, 7777], [7786, 7787], [7808, 7813], 7838, [7840, 7929], [8192, 8202], [8204, 8205], [8211, 8213], [8216, 8222], [8224, 8226], 8230, 8240, [8242, 8243], [8249, 8250], [8253, 8254], 8260, 8304, [8308, 8313], [8320, 8329], 8353, [8355, 8356], [8358, 8364], 8372, [8376, 8378], [8380, 8381], 8471, 8482, 8486, 8706, 8710, 8719, [8721, 8722], 8725, [8729, 8730], 8734, 8747, 8776, 8800, [8804, 8805], [9001, 9002], [9312, 9321], [9332, 9341], 9450, 9471, 9674, [10102, 10111], 11800, [12289, 12290], [12298, 12305], [12308, 12315], [12593, 12643], [12800, 12828], [12896, 12923], 12927, [44032, 55203], [57348, 57349], 63171, 63197, [64256, 64260], [65040, 65049], [65073, 65074], [65081, 65092]]";
160
161 compare(
162 CODEPOINTS,
163 vec![
164 ("Emoji", 22, 1589),
165 ("Emoji_Component", 17, 156),
166 ("Greek", 2, 554),
167 ("Hangul", 11281, 11753),
168 ("Inherited", 23, 714),
169 ("Katakana", 1, 335),
170 ("Latin", 391, 1528),
171 ("PUA", 4, 137468),
172 ("Unknown", 10, 954979),
173 ],
174 );
175 }
176
177 #[test]
178 fn proxima_nova_hebrew() {
179 const CODEPOINTS: &str = "[0, 13, [32, 126], [160, 383], 402, 413, [416, 417], [431, 432], [486, 487], [490, 491], [506, 511], [536, 539], [562, 563], 567, 626, [699, 700], [710, 711], 713, 715, [728, 733], [768, 772], [774, 780], 783, 785, 803, [806, 808], 814, 817, 894, [1456, 1468], [1470, 1476], 1479, [1488, 1514], [1520, 1524], [7682, 7683], [7690, 7691], [7710, 7711], [7714, 7717], [7730, 7731], [7766, 7767], [7776, 7777], [7786, 7789], [7806, 7813], [7826, 7827], 7838, [7840, 7929], [8192, 8202], [8204, 8207], [8211, 8213], [8216, 8222], [8224, 8226], 8230, 8240, [8242, 8243], [8249, 8250], [8253, 8254], 8260, 8304, [8308, 8313], [8320, 8329], 8353, [8355, 8356], [8358, 8364], 8372, [8376, 8378], [8380, 8381], 8471, 8482, 8486, 8706, 8710, 8719, [8721, 8722], 8725, [8729, 8730], 8734, 8747, 8776, 8800, [8804, 8805], 9674, 9676, 11800, [57348, 57349], 63171, 63197, [64256, 64260], 64285, 64287, [64297, 64310], [64312, 64316], 64318, [64320, 64321], [64323, 64324], [64326, 64334]]";
180
181 compare(
182 CODEPOINTS,
183 vec![
184 ("Emoji", 21, 1589),
185 ("Emoji_Component", 17, 156),
186 ("Greek", 2, 554),
187 ("Hebrew", 88, 143),
188 ("Inherited", 23, 714),
189 ("Latin", 401, 1528),
190 ("PUA", 4, 137468),
191 ("Unknown", 14, 954979),
192 ],
193 );
194 }
195
196 #[test]
197 fn proxima_nova_tamil() {
198 const CODEPOINTS: &str = "[0, 13, [32, 126], [160, 383], 402, 413, [416, 417], [431, 432], [486, 487], [490, 491], [506, 511], [536, 539], [562, 563], 567, 626, [699, 700], [710, 711], 713, 715, [728, 733], [768, 772], [774, 780], 783, 785, 803, [806, 808], 814, 817, 894, [2404, 2405], [2946, 2947], [2949, 2954], [2958, 2960], [2962, 2965], [2969, 2970], 2972, [2974, 2975], [2979, 2980], [2984, 2986], [2990, 3001], [3006, 3010], [3014, 3016], [3018, 3021], 3024, 3031, [3046, 3066], [7682, 7683], [7690, 7691], [7710, 7711], [7714, 7715], [7732, 7735], [7738, 7739], [7748, 7753], [7766, 7767], [7774, 7779], [7786, 7789], [7808, 7813], 7838, [7840, 7929], [8192, 8202], [8204, 8205], [8211, 8213], [8216, 8222], [8224, 8226], 8230, 8240, [8242, 8243], [8249, 8250], [8253, 8254], 8260, 8304, [8308, 8313], [8320, 8329], 8353, [8355, 8356], [8358, 8364], 8372, [8376, 8378], [8380, 8381], 8471, 8482, 8486, 8706, 8710, 8719, [8721, 8722], 8725, [8729, 8730], 8734, 8747, 8776, 8800, [8804, 8805], 9674, 9676, 11800, [57348, 57349], 63171, 63197, [64256, 64260]]";
199
200 compare(
201 CODEPOINTS,
202 vec![
203 ("Devanagari", 1, 169),
204 ("Emoji", 21, 1589),
205 ("Emoji_Component", 16, 156),
206 ("Greek", 2, 554),
207 ("Inherited", 22, 714),
208 ("Latin", 409, 1528),
209 ("PUA", 4, 137468),
210 ("Tamil", 72, 141),
211 ("Unknown", 23, 954979),
212 ],
213 );
214 }
215
216 #[test]
217 fn proxima_nova_thai() {
218 const CODEPOINTS: &str = "[0, 13, [32, 126], [160, 383], 402, 413, [416, 417], [431, 432], [486, 487], [490, 491], [506, 511], [536, 539], [562, 563], 567, 626, [699, 700], [710, 711], 713, 715, [728, 733], [768, 772], [774, 780], 783, 785, 803, [806, 808], 814, 817, 894, [3585, 3642], [3647, 3675], [7682, 7683], [7690, 7691], [7710, 7711], [7714, 7715], [7766, 7767], [7776, 7777], [7786, 7787], [7808, 7813], 7838, [7840, 7929], [8192, 8202], [8204, 8205], [8211, 8213], [8216, 8222], [8224, 8226], 8230, 8240, [8242, 8243], [8249, 8250], [8253, 8254], 8260, 8304, [8308, 8313], [8320, 8329], 8353, [8355, 8356], [8358, 8364], 8372, [8376, 8378], [8380, 8381], 8471, 8482, 8486, 8706, 8710, 8719, [8721, 8722], 8725, [8729, 8730], 8734, 8747, 8776, 8800, [8804, 8805], 9674, 9676, 11800, [57348, 57349], 63171, 63197, [64256, 64260]]";
219
220 compare(
221 CODEPOINTS,
222 vec![
223 ("Emoji", 21, 1589),
224 ("Emoji_Component", 16, 156),
225 ("Greek", 2, 554),
226 ("Inherited", 22, 714),
227 ("Latin", 391, 1528),
228 ("PUA", 4, 137468),
229 ("Thai", 86, 88),
230 ("Unknown", 9, 954979),
231 ],
232 );
233 }
234
235 fn compare(codepoints: &str, expected: Vec<(&str, usize, usize)>) {
236 let codepoints: Vec<Range> = ok!(serde_json::from_str(codepoints));
237 let results = Context::default().detect(codepoints.into_iter().map(Into::into));
238 let actual = results
239 .into_iter()
240 .map(|(name, script)| (name, script.count, script.total))
241 .collect::<Vec<_>>();
242 assert_eq!(actual, expected);
243 }
244}