1#[rustfmt::skip]
45pub const COMBINING_MARK_RANGES: &[(u32, u32)] = &[
46 (0x0300, 0x036F),
48 (0x0483, 0x0489),
50 (0x0591, 0x05BD),
52 (0x05BF, 0x05BF),
53 (0x05C1, 0x05C2),
54 (0x05C4, 0x05C5),
55 (0x05C7, 0x05C7),
56 (0x0610, 0x061A),
58 (0x064B, 0x065F),
59 (0x0670, 0x0670),
60 (0x06D6, 0x06DC),
61 (0x06DF, 0x06E4),
62 (0x06E7, 0x06E8),
63 (0x06EA, 0x06ED),
64 (0x0711, 0x0711),
66 (0x0730, 0x074A),
67 (0x07A6, 0x07B0),
69 (0x07EB, 0x07F3),
71 (0x07FD, 0x07FD),
72 (0x0816, 0x0819),
74 (0x081B, 0x0823),
75 (0x0825, 0x0827),
76 (0x0829, 0x082D),
77 (0x0859, 0x085B),
79 (0x08D3, 0x08E1),
81 (0x08E3, 0x0902),
82 (0x0903, 0x0903),
84 (0x093A, 0x093A),
85 (0x093B, 0x093B),
86 (0x093C, 0x093C),
87 (0x093E, 0x094F),
88 (0x0951, 0x0957),
89 (0x0962, 0x0963),
90 (0x0981, 0x0983),
92 (0x09BC, 0x09BC),
93 (0x09BE, 0x09C4),
94 (0x09C7, 0x09C8),
95 (0x09CB, 0x09CD),
96 (0x09D7, 0x09D7),
97 (0x09E2, 0x09E3),
98 (0x09FE, 0x09FE),
99 (0x0A01, 0x0A03),
101 (0x0A3C, 0x0A3C),
102 (0x0A3E, 0x0A42),
103 (0x0A47, 0x0A48),
104 (0x0A4B, 0x0A4D),
105 (0x0A51, 0x0A51),
106 (0x0A70, 0x0A71),
107 (0x0A75, 0x0A75),
108 (0x0A81, 0x0A83),
110 (0x0ABC, 0x0ABC),
111 (0x0ABE, 0x0AC5),
112 (0x0AC7, 0x0AC9),
113 (0x0ACB, 0x0ACD),
114 (0x0AE2, 0x0AE3),
115 (0x0AFA, 0x0AFF),
116 (0x0B01, 0x0B03),
118 (0x0B3C, 0x0B3C),
119 (0x0B3E, 0x0B44),
120 (0x0B47, 0x0B48),
121 (0x0B4B, 0x0B4D),
122 (0x0B55, 0x0B57),
123 (0x0B62, 0x0B63),
124 (0x0B82, 0x0B82),
126 (0x0BBE, 0x0BC2),
127 (0x0BC6, 0x0BC8),
128 (0x0BCA, 0x0BCD),
129 (0x0BD7, 0x0BD7),
130 (0x0C00, 0x0C04),
132 (0x0C3C, 0x0C3C),
133 (0x0C3E, 0x0C44),
134 (0x0C46, 0x0C48),
135 (0x0C4A, 0x0C4D),
136 (0x0C55, 0x0C56),
137 (0x0C62, 0x0C63),
138 (0x0C81, 0x0C83),
139 (0x0CBC, 0x0CBC),
141 (0x0CBE, 0x0CC4),
142 (0x0CC6, 0x0CC8),
143 (0x0CCA, 0x0CCD),
144 (0x0CD5, 0x0CD6),
145 (0x0CE2, 0x0CE3),
146 (0x0D00, 0x0D03),
148 (0x0D3B, 0x0D3C),
149 (0x0D3E, 0x0D44),
150 (0x0D46, 0x0D48),
151 (0x0D4A, 0x0D4D),
152 (0x0D57, 0x0D57),
153 (0x0D62, 0x0D63),
154 (0x0D81, 0x0D83),
155 (0x0DCA, 0x0DCA),
157 (0x0DCF, 0x0DD4),
158 (0x0DD6, 0x0DD6),
159 (0x0DD8, 0x0DDF),
160 (0x0DF2, 0x0DF3),
161 (0x0E31, 0x0E31),
163 (0x0E34, 0x0E3A),
164 (0x0E47, 0x0E4E),
165 (0x0EB1, 0x0EB1),
167 (0x0EB4, 0x0EBC),
168 (0x0EC8, 0x0ECD),
169 (0x0F18, 0x0F19),
171 (0x0F35, 0x0F35),
172 (0x0F37, 0x0F37),
173 (0x0F39, 0x0F39),
174 (0x0F3E, 0x0F3F),
175 (0x0F71, 0x0F84),
176 (0x0F86, 0x0F87),
177 (0x0F8D, 0x0F97),
178 (0x0F99, 0x0FBC),
179 (0x0FC6, 0x0FC6),
180 (0x102B, 0x103E),
182 (0x1056, 0x1059),
183 (0x105E, 0x1060),
184 (0x1062, 0x1064),
185 (0x1067, 0x106D),
186 (0x1071, 0x1074),
187 (0x1082, 0x108D),
188 (0x108F, 0x108F),
189 (0x109A, 0x109D),
190 (0x135D, 0x135F),
192 (0x1712, 0x1715),
194 (0x1732, 0x1734),
195 (0x1752, 0x1753),
196 (0x1772, 0x1773),
197 (0x17B4, 0x17D3),
199 (0x17DD, 0x17DD),
200 (0x180B, 0x180D),
202 (0x1885, 0x1886),
203 (0x18A9, 0x18A9),
204 (0x1920, 0x192B),
206 (0x1930, 0x193B),
207 (0x1A17, 0x1A1B),
209 (0x1A55, 0x1A5E),
211 (0x1A60, 0x1A7C),
212 (0x1A7F, 0x1A7F),
213 (0x1AB0, 0x1ACE),
214 (0x1B00, 0x1B04),
216 (0x1B34, 0x1B44),
217 (0x1B6B, 0x1B73),
218 (0x1B80, 0x1B82),
219 (0x1BA1, 0x1BAD),
220 (0x1BE6, 0x1BF3),
221 (0x1C24, 0x1C37),
223 (0x1CD0, 0x1CD2),
224 (0x1CD4, 0x1CE8),
225 (0x1CED, 0x1CED),
226 (0x1CF4, 0x1CF4),
227 (0x1CF7, 0x1CF9),
228 (0x1DC0, 0x1DFF),
229 (0x20D0, 0x20F0),
231 (0x2CEF, 0x2CF1),
233 (0x2D7F, 0x2D7F),
235 (0x2DE0, 0x2DFF),
237 (0x302A, 0x302F),
239 (0x3099, 0x309A),
240 (0xA66F, 0xA672),
242 (0xA674, 0xA67D),
243 (0xA69E, 0xA69F),
244 (0xA6F0, 0xA6F1),
246 (0xA802, 0xA802),
247 (0xA806, 0xA806),
248 (0xA80B, 0xA80B),
249 (0xA823, 0xA827),
250 (0xA82C, 0xA82C),
251 (0xA880, 0xA881),
253 (0xA8B4, 0xA8C5),
254 (0xA8E0, 0xA8F1),
255 (0xA8FF, 0xA8FF),
256 (0xA926, 0xA92D),
257 (0xA947, 0xA953),
258 (0xA980, 0xA983),
259 (0xA9B3, 0xA9C0),
260 (0xA9E5, 0xA9E5),
261 (0xAA29, 0xAA36),
262 (0xAA43, 0xAA43),
263 (0xAA4C, 0xAA4D),
264 (0xAA7B, 0xAA7D),
265 (0xAAB0, 0xAAB0),
266 (0xAAB2, 0xAAB4),
267 (0xAAB7, 0xAAB8),
268 (0xAABE, 0xAABF),
269 (0xAAC1, 0xAAC1),
270 (0xAAEB, 0xAAEF),
271 (0xAAF5, 0xAAF6),
272 (0xABE3, 0xABEA),
273 (0xABEC, 0xABED),
274 (0xFB1E, 0xFB1E),
276 (0xFE00, 0xFE0F),
278 (0xFE20, 0xFE2F),
279 (0x101FD, 0x101FD),
285 (0x102E0, 0x102E0),
286 (0x10376, 0x1037A),
287 (0x10A01, 0x10A03),
288 (0x10A05, 0x10A06),
289 (0x10A0C, 0x10A0F),
290 (0x10A38, 0x10A3A),
291 (0x10A3F, 0x10A3F),
292 (0x10AE5, 0x10AE6),
293 (0x10D24, 0x10D27),
294 (0x10EAB, 0x10EAC),
295 (0x10F46, 0x10F50),
296 (0x10F82, 0x10F85),
297 (0x11000, 0x11002),
298 (0x11038, 0x11046),
299 (0x11070, 0x11070),
300 (0x11073, 0x11074),
301 (0x1107F, 0x11082),
302 (0x110B0, 0x110BA),
303 (0x110C2, 0x110C2),
304 (0x11100, 0x11102),
305 (0x11127, 0x11134),
306 (0x11145, 0x11146),
307 (0x11173, 0x11173),
308 (0x11180, 0x11182),
309 (0x111B3, 0x111C0),
310 (0x111C9, 0x111CC),
311 (0x111CE, 0x111CF),
312 (0x1122C, 0x11237),
313 (0x1123E, 0x1123E),
314 (0x112DF, 0x112EA),
315 (0x11300, 0x11303),
316 (0x1133B, 0x1133C),
317 (0x1133E, 0x11344),
318 (0x11347, 0x11348),
319 (0x1134B, 0x1134D),
320 (0x11357, 0x11357),
321 (0x11362, 0x11363),
322 (0x11366, 0x1136C),
323 (0x11370, 0x11374),
324 (0x11435, 0x11446),
325 (0x1145E, 0x1145E),
326 (0x114B0, 0x114C3),
327 (0x115AF, 0x115B5),
328 (0x115B8, 0x115C0),
329 (0x115DC, 0x115DD),
330 (0x11630, 0x11640),
331 (0x116AB, 0x116B7),
332 (0x1171D, 0x1172B),
333 (0x1182C, 0x1183A),
334 (0x11930, 0x11935),
335 (0x11937, 0x11938),
336 (0x1193B, 0x1193E),
337 (0x11940, 0x11940),
338 (0x11942, 0x11943),
339 (0x119D1, 0x119D7),
340 (0x119DA, 0x119E0),
341 (0x119E4, 0x119E4),
342 (0x11A01, 0x11A0A),
343 (0x11A33, 0x11A39),
344 (0x11A3B, 0x11A3E),
345 (0x11A47, 0x11A47),
346 (0x11A51, 0x11A5B),
347 (0x11A8A, 0x11A99),
348 (0x11C2F, 0x11C36),
349 (0x11C38, 0x11C3F),
350 (0x11C92, 0x11CA7),
351 (0x11CA9, 0x11CB6),
352 (0x11D31, 0x11D36),
353 (0x11D3A, 0x11D3A),
354 (0x11D3C, 0x11D3D),
355 (0x11D3F, 0x11D45),
356 (0x11D47, 0x11D47),
357 (0x11D8A, 0x11D8E),
358 (0x11D90, 0x11D91),
359 (0x11D93, 0x11D97),
360 (0x11EF3, 0x11EF6),
361 (0x16AF0, 0x16AF4),
362 (0x16B30, 0x16B36),
363 (0x16F4F, 0x16F4F),
364 (0x16F51, 0x16F87),
365 (0x16F8F, 0x16F92),
366 (0x16FE4, 0x16FE4),
367 (0x16FF0, 0x16FF1),
368 (0x1BC9D, 0x1BC9E),
369 (0x1CF00, 0x1CF2D),
370 (0x1CF30, 0x1CF46),
371 (0x1D165, 0x1D169),
372 (0x1D16D, 0x1D172),
373 (0x1D17B, 0x1D182),
374 (0x1D185, 0x1D18B),
375 (0x1D1AA, 0x1D1AD),
376 (0x1D242, 0x1D244),
377 (0x1DA00, 0x1DA36),
378 (0x1DA3B, 0x1DA6C),
379 (0x1DA75, 0x1DA75),
380 (0x1DA84, 0x1DA84),
381 (0x1DA9B, 0x1DA9F),
382 (0x1DAA1, 0x1DAAF),
383 (0x1E000, 0x1E006),
384 (0x1E008, 0x1E018),
385 (0x1E01B, 0x1E021),
386 (0x1E023, 0x1E024),
387 (0x1E026, 0x1E02A),
388 (0x1E130, 0x1E136),
389 (0x1E2AE, 0x1E2AE),
390 (0x1E2EC, 0x1E2EF),
391 (0x1E8D0, 0x1E8D6),
392 (0x1E944, 0x1E94A),
393 (0xE0100, 0xE01EF),
395];
396
397pub fn combining_mark_ranges() -> &'static [(u32, u32)] {
401 COMBINING_MARK_RANGES
402}
403
404pub fn encode_ranges_bytes(table: &[(u32, u32)]) -> Vec<u8> {
407 super::encode_u32_pair_table(table)
408}
409
410pub fn encoded_ranges_size(table: &[(u32, u32)]) -> usize {
412 super::encoded_u32_pair_table_size(table.len())
413}
414
415pub fn is_combining_mark(cp: u32) -> bool {
422 super::cp_in_ranges(cp, COMBINING_MARK_RANGES)
423}
424
425#[cfg(test)]
426mod tests {
427 use super::*;
428
429 #[test]
430 fn ranges_sorted_non_overlapping() {
431 let table = COMBINING_MARK_RANGES;
432 for win in table.windows(2) {
433 let (_, prev_end) = win[0];
434 let (next_start, next_end) = win[1];
435 assert!(
436 prev_end < next_start,
437 "combining-mark ranges must be sorted + non-overlapping; \
438 prev_end={prev_end:#x} >= next_start={next_start:#x}"
439 );
440 assert!(
441 next_start <= next_end,
442 "range start must be <= end; got {next_start:#x}..={next_end:#x}"
443 );
444 }
445 }
446
447 #[test]
448 fn common_combining_marks_present() {
449 assert!(is_combining_mark(0x0301));
451 assert!(is_combining_mark(0x0302));
453 assert!(is_combining_mark(0x0308));
455 assert!(is_combining_mark(0xFE0F));
457 assert!(!is_combining_mark(0x200D));
461 }
462
463 #[test]
464 fn ascii_letters_not_marks() {
465 for cp in 0x20u32..0x7F {
466 assert!(
467 !is_combining_mark(cp),
468 "ascii cp {cp:#x} must not be detected as a Mark"
469 );
470 }
471 }
472
473 #[test]
474 fn encode_ranges_layout() {
475 let toy: &[(u32, u32)] = &[(0x300, 0x36F), (0x483, 0x489)];
476 let bytes = encode_ranges_bytes(toy);
477 assert_eq!(bytes.len(), 4 + 16);
478 assert_eq!(&bytes[0..4], &2u32.to_le_bytes());
479 assert_eq!(&bytes[4..8], &0x300u32.to_le_bytes());
480 assert_eq!(&bytes[8..12], &0x36Fu32.to_le_bytes());
481 assert_eq!(&bytes[12..16], &0x483u32.to_le_bytes());
482 assert_eq!(&bytes[16..20], &0x489u32.to_le_bytes());
483 }
484}