unicode_shaper/ubidi/
internal.rs

1/// If a RTL set is reversed, but surounded by (), [], {}, or <>, then mirror the sets.
2/// This also tracks other special characters that need to be mirrored. Some examples:
3/// '«', '»', '∕', '∟', '∠', '∡', '∢', '∤', '≃', '≅', '≌', '⊘', '⊦', '⊨',
4pub static MIRROR_CHAR: [[u16; 2]; 48] = [
5    [40, 41],
6    [41, 40],
7    [60, 62],
8    [62, 60],
9    [91, 93],
10    [93, 91],
11    [123, 125],
12    [125, 123],
13    [171, 187],
14    [187, 171],
15    [8725, 10741],
16    [8735, 11262],
17    [8736, 10659],
18    [8737, 10651],
19    [8738, 10656],
20    [8740, 10990],
21    [8771, 8909],
22    [8773, 8780],
23    [8780, 8773],
24    [8856, 10680],
25    [8870, 10974],
26    [8872, 10980],
27    [8873, 10979],
28    [8875, 10981],
29    [8888, 10204],
30    [8909, 8771],
31    [8946, 8954],
32    [8947, 8955],
33    [8948, 8956],
34    [8950, 8957],
35    [8951, 8958],
36    [8954, 8946],
37    [8955, 8947],
38    [8956, 8948],
39    [8957, 8950],
40    [8958, 8951],
41    [10204, 8888],
42    [10651, 8737],
43    [10656, 8738],
44    [10659, 8736],
45    [10680, 8856],
46    [10741, 8725],
47    [10974, 8870],
48    [10979, 8873],
49    [10980, 8872],
50    [10981, 8875],
51    [10990, 8740],
52    [11262, 8735],
53];
54// 192 bytes
55
56/// Adjust string characters that have mirrored characters. Examples:
57/// '«', '»', '∕', '∟', '∠', '∡', '∢', '∤', '≃', '≅', '≌', '⊘', '⊦', '⊨',
58pub fn mirror_adjust_string(s: &mut [u16]) {
59    for c in s {
60        for from_to in MIRROR_CHAR {
61            if *c == from_to[0] {
62                *c = from_to[1];
63                break;
64            }
65        }
66    }
67}
68
69/// A RandALCat character is a character with unambiguously right-to-left directionality.
70pub static RAND_AL_CAT: [[u16; 2]; 45] = [
71    [0x05BE, 0x05BE],
72    [0x05C0, 0x05C0],
73    [0x05C3, 0x05C3],
74    [0x05C6, 0x05C6],
75    [0x05D0, 0x05EA],
76    [0x05F0, 0x05F4],
77    [0x0608, 0x0608],
78    [0x060B, 0x060B],
79    [0x060D, 0x060D],
80    [0x061B, 0x061B],
81    [0x061E, 0x064A],
82    [0x066D, 0x066F],
83    [0x0671, 0x06D5],
84    [0x06E5, 0x06E6],
85    [0x06EE, 0x06EF],
86    [0x06FA, 0x070D],
87    [0x0710, 0x0710],
88    [0x0712, 0x072F],
89    [0x074D, 0x07A5],
90    [0x07B1, 0x07B1],
91    [0x07C0, 0x07EA],
92    [0x07F4, 0x07F5],
93    [0x07FA, 0x07FA],
94    [0x0800, 0x0815],
95    [0x081A, 0x081A],
96    [0x0824, 0x0824],
97    [0x0828, 0x0828],
98    [0x0830, 0x083E],
99    [0x0840, 0x0858],
100    [0x085E, 0x085E],
101    [0x200F, 0x200F],
102    [0xFB1D, 0xFB1D],
103    [0xFB1F, 0xFB28],
104    [0xFB2A, 0xFB36],
105    [0xFB38, 0xFB3C],
106    [0xFB3E, 0xFB3E],
107    [0xFB40, 0xFB41],
108    [0xFB43, 0xFB44],
109    [0xFB46, 0xFBC1],
110    [0xFBD3, 0xFD3D],
111    [0xFD50, 0xFD8F],
112    [0xFD92, 0xFDC7],
113    [0xFDF0, 0xFDFC],
114    [0xFE70, 0xFE74],
115    [0xFE76, 0xFEFC],
116];
117// 168 bytes
118
119/// Check if a character is RandALCat (Right-to-left reading characters)
120pub fn is_rtl(c: &u16) -> bool {
121    for arr in RAND_AL_CAT {
122        if *c >= arr[0] && *c <= arr[1] {
123            return true;
124        }
125    }
126    false
127}
128
129/// NEUTRAL characters are those with no inherent directionality, which can be
130/// treated as being part of any adjacent runs of text with other directionality.
131pub static NEUTRAL: [[u16; 2]; 137] = [
132    [0x0009, 0x000D],
133    [0x001C, 0x0022],
134    [0x0026, 0x002A],
135    [0x003B, 0x0040],
136    [0x005B, 0x0060],
137    [0x007B, 0x007E],
138    [0x0085, 0x0085],
139    [0x00A1, 0x00A1],
140    [0x00A6, 0x00A9],
141    [0x00AB, 0x00AC],
142    [0x00AE, 0x00AF],
143    [0x00B4, 0x00B4],
144    [0x00B6, 0x00B8],
145    [0x00BB, 0x00BF],
146    [0x00D7, 0x00D7],
147    [0x00F7, 0x00F7],
148    [0x02B9, 0x02BA],
149    [0x02C2, 0x02CF],
150    [0x02D2, 0x02DF],
151    [0x02E5, 0x02ED],
152    [0x02EF, 0x02FF],
153    [0x0374, 0x0375],
154    [0x037E, 0x037E],
155    [0x0384, 0x0385],
156    [0x0387, 0x0387],
157    [0x03F6, 0x03F6],
158    [0x058A, 0x058A],
159    [0x0606, 0x0607],
160    [0x060E, 0x060F],
161    [0x06DE, 0x06DE],
162    [0x06E9, 0x06E9],
163    [0x07F6, 0x07F9],
164    [0x0BF3, 0x0BF8],
165    [0x0BFA, 0x0BFA],
166    [0x0C78, 0x0C7E],
167    [0x0F3A, 0x0F3D],
168    [0x1390, 0x1399],
169    [0x1400, 0x1400],
170    [0x1680, 0x1680],
171    [0x169B, 0x169C],
172    [0x17F0, 0x17F9],
173    [0x1800, 0x180A],
174    [0x180E, 0x180E],
175    [0x1940, 0x1940],
176    [0x1944, 0x1945],
177    [0x19DE, 0x19FF],
178    [0x1FBD, 0x1FBD],
179    [0x1FBF, 0x1FC1],
180    [0x1FCD, 0x1FCF],
181    [0x1FDD, 0x1FDF],
182    [0x1FED, 0x1FEF],
183    [0x1FFD, 0x1FFE],
184    [0x2000, 0x200A],
185    [0x2010, 0x202E],
186    [0x2035, 0x2043],
187    [0x2045, 0x205F],
188    [0x207C, 0x207E],
189    [0x208C, 0x208E],
190    [0x2100, 0x2101],
191    [0x2103, 0x2106],
192    [0x2108, 0x2109],
193    [0x2114, 0x2114],
194    [0x2116, 0x2118],
195    [0x211E, 0x2123],
196    [0x2125, 0x2125],
197    [0x2127, 0x2127],
198    [0x2129, 0x2129],
199    [0x213A, 0x213B],
200    [0x2140, 0x2144],
201    [0x214A, 0x214D],
202    [0x2150, 0x215F],
203    [0x2189, 0x2189],
204    [0x2190, 0x2211],
205    [0x2214, 0x2335],
206    [0x237B, 0x2394],
207    [0x2396, 0x23F3],
208    [0x2400, 0x2426],
209    [0x2440, 0x244A],
210    [0x2460, 0x2487],
211    [0x24EA, 0x26AB],
212    [0x26AD, 0x26FF],
213    [0x2701, 0x27CA],
214    [0x27CC, 0x27CC],
215    [0x27CE, 0x27FF],
216    [0x2900, 0x2B4C],
217    [0x2B50, 0x2B59],
218    [0x2CE5, 0x2CEA],
219    [0x2CF9, 0x2CFF],
220    [0x2E00, 0x2E31],
221    [0x2E80, 0x2E99],
222    [0x2E9B, 0x2EF3],
223    [0x2F00, 0x2FD5],
224    [0x2FF0, 0x2FFB],
225    [0x3000, 0x3004],
226    [0x3008, 0x3020],
227    [0x3030, 0x3030],
228    [0x3036, 0x3037],
229    [0x303D, 0x303F],
230    [0x309B, 0x309C],
231    [0x30A0, 0x30A0],
232    [0x30FB, 0x30FB],
233    [0x31C0, 0x31E3],
234    [0x321D, 0x321E],
235    [0x3250, 0x325F],
236    [0x327C, 0x327E],
237    [0x32B1, 0x32BF],
238    [0x32CC, 0x32CF],
239    [0x3377, 0x337A],
240    [0x33DE, 0x33DF],
241    [0x33FF, 0x33FF],
242    [0x4DC0, 0x4DFF],
243    [0xA490, 0xA4C6],
244    [0xA60D, 0xA60F],
245    [0xA673, 0xA673],
246    [0xA67E, 0xA67F],
247    [0xA700, 0xA721],
248    [0xA788, 0xA788],
249    [0xA828, 0xA82B],
250    [0xA874, 0xA877],
251    [0xFD3E, 0xFD3F],
252    [0xFDFD, 0xFDFD],
253    [0xFE10, 0xFE19],
254    [0xFE30, 0xFE4F],
255    [0xFE51, 0xFE51],
256    [0xFE54, 0xFE54],
257    [0xFE56, 0xFE5E],
258    [0xFE60, 0xFE61],
259    [0xFE64, 0xFE66],
260    [0xFE68, 0xFE68],
261    [0xFE6B, 0xFE6B],
262    [0xFF01, 0xFF02],
263    [0xFF06, 0xFF0A],
264    [0xFF1B, 0xFF20],
265    [0xFF3B, 0xFF40],
266    [0xFF5B, 0xFF65],
267    [0xFFE2, 0xFFE4],
268    [0xFFE8, 0xFFEE],
269];
270// 536 bytes
271
272/// Check if a character is NeutralCat (Neutral characters)
273pub fn is_neutral(c: &u16) -> bool {
274    for arr in NEUTRAL {
275        if *c >= arr[0] && *c <= arr[1] {
276            return true;
277        }
278    }
279    false
280}
281
282/// List of WeakCat (Weak characters)
283pub static WEAK: [[u16; 2]; 228] = [
284    [0x0000, 0x0008],
285    [0x000E, 0x001B],
286    [0x0023, 0x0025],
287    [0x002B, 0x003A],
288    [0x007F, 0x0084],
289    [0x0086, 0x00A0],
290    [0x00A2, 0x00A5],
291    [0x00AD, 0x00AD],
292    [0x00B0, 0x00B3],
293    [0x00B9, 0x00B9],
294    [0x0300, 0x036F],
295    [0x0483, 0x0489],
296    [0x0591, 0x05BD],
297    [0x05BF, 0x05BF],
298    [0x05C1, 0x05C2],
299    [0x05C4, 0x05C5],
300    [0x05C7, 0x05C7],
301    [0x0600, 0x0603],
302    [0x0609, 0x060A],
303    [0x060C, 0x060C],
304    [0x0610, 0x061A],
305    [0x064B, 0x066C],
306    [0x0670, 0x0670],
307    [0x06D6, 0x06DD],
308    [0x06DF, 0x06E4],
309    [0x06E7, 0x06E8],
310    [0x06EA, 0x06ED],
311    [0x06F0, 0x06F9],
312    [0x070F, 0x070F],
313    [0x0711, 0x0711],
314    [0x0730, 0x074A],
315    [0x07A6, 0x07B0],
316    [0x07EB, 0x07F3],
317    [0x0816, 0x0819],
318    [0x081B, 0x0823],
319    [0x0825, 0x0827],
320    [0x0829, 0x082D],
321    [0x0859, 0x085B],
322    [0x0900, 0x0902],
323    [0x093A, 0x093A],
324    [0x093C, 0x093C],
325    [0x0941, 0x0948],
326    [0x094D, 0x094D],
327    [0x0951, 0x0957],
328    [0x0962, 0x0963],
329    [0x0981, 0x0981],
330    [0x09BC, 0x09BC],
331    [0x09C1, 0x09C4],
332    [0x09CD, 0x09CD],
333    [0x09E2, 0x09E3],
334    [0x09F2, 0x09F3],
335    [0x09FB, 0x09FB],
336    [0x0A01, 0x0A02],
337    [0x0A3C, 0x0A3C],
338    [0x0A41, 0x0A42],
339    [0x0A47, 0x0A48],
340    [0x0A4B, 0x0A4D],
341    [0x0A51, 0x0A51],
342    [0x0A70, 0x0A71],
343    [0x0A75, 0x0A75],
344    [0x0A81, 0x0A82],
345    [0x0ABC, 0x0ABC],
346    [0x0AC1, 0x0AC5],
347    [0x0AC7, 0x0AC8],
348    [0x0ACD, 0x0ACD],
349    [0x0AE2, 0x0AE3],
350    [0x0AF1, 0x0AF1],
351    [0x0B01, 0x0B01],
352    [0x0B3C, 0x0B3C],
353    [0x0B3F, 0x0B3F],
354    [0x0B41, 0x0B44],
355    [0x0B4D, 0x0B4D],
356    [0x0B56, 0x0B56],
357    [0x0B62, 0x0B63],
358    [0x0B82, 0x0B82],
359    [0x0BC0, 0x0BC0],
360    [0x0BCD, 0x0BCD],
361    [0x0BF9, 0x0BF9],
362    [0x0C3E, 0x0C40],
363    [0x0C46, 0x0C48],
364    [0x0C4A, 0x0C4D],
365    [0x0C55, 0x0C56],
366    [0x0C62, 0x0C63],
367    [0x0CBC, 0x0CBC],
368    [0x0CCC, 0x0CCD],
369    [0x0CE2, 0x0CE3],
370    [0x0D41, 0x0D44],
371    [0x0D4D, 0x0D4D],
372    [0x0D62, 0x0D63],
373    [0x0DCA, 0x0DCA],
374    [0x0DD2, 0x0DD4],
375    [0x0DD6, 0x0DD6],
376    [0x0E31, 0x0E31],
377    [0x0E34, 0x0E3A],
378    [0x0E3F, 0x0E3F],
379    [0x0E47, 0x0E4E],
380    [0x0EB1, 0x0EB1],
381    [0x0EB4, 0x0EB9],
382    [0x0EBB, 0x0EBC],
383    [0x0EC8, 0x0ECD],
384    [0x0F18, 0x0F19],
385    [0x0F35, 0x0F35],
386    [0x0F37, 0x0F37],
387    [0x0F39, 0x0F39],
388    [0x0F71, 0x0F7E],
389    [0x0F80, 0x0F84],
390    [0x0F86, 0x0F87],
391    [0x0F8D, 0x0F97],
392    [0x0F99, 0x0FBC],
393    [0x0FC6, 0x0FC6],
394    [0x102D, 0x1030],
395    [0x1032, 0x1037],
396    [0x1039, 0x103A],
397    [0x103D, 0x103E],
398    [0x1058, 0x1059],
399    [0x105E, 0x1060],
400    [0x1071, 0x1074],
401    [0x1082, 0x1082],
402    [0x1085, 0x1086],
403    [0x108D, 0x108D],
404    [0x109D, 0x109D],
405    [0x135D, 0x135F],
406    [0x1712, 0x1714],
407    [0x1732, 0x1734],
408    [0x1752, 0x1753],
409    [0x1772, 0x1773],
410    [0x17B7, 0x17BD],
411    [0x17C6, 0x17C6],
412    [0x17C9, 0x17D3],
413    [0x17DB, 0x17DB],
414    [0x17DD, 0x17DD],
415    [0x180B, 0x180D],
416    [0x18A9, 0x18A9],
417    [0x1920, 0x1922],
418    [0x1927, 0x1928],
419    [0x1932, 0x1932],
420    [0x1939, 0x193B],
421    [0x1A17, 0x1A18],
422    [0x1A56, 0x1A56],
423    [0x1A58, 0x1A5E],
424    [0x1A60, 0x1A60],
425    [0x1A62, 0x1A62],
426    [0x1A65, 0x1A6C],
427    [0x1A73, 0x1A7C],
428    [0x1A7F, 0x1A7F],
429    [0x1B00, 0x1B03],
430    [0x1B34, 0x1B34],
431    [0x1B36, 0x1B3A],
432    [0x1B3C, 0x1B3C],
433    [0x1B42, 0x1B42],
434    [0x1B6B, 0x1B73],
435    [0x1B80, 0x1B81],
436    [0x1BA2, 0x1BA5],
437    [0x1BA8, 0x1BA9],
438    [0x1BE6, 0x1BE6],
439    [0x1BE8, 0x1BE9],
440    [0x1BED, 0x1BED],
441    [0x1BEF, 0x1BF1],
442    [0x1C2C, 0x1C33],
443    [0x1C36, 0x1C37],
444    [0x1CD0, 0x1CD2],
445    [0x1CD4, 0x1CE0],
446    [0x1CE2, 0x1CE8],
447    [0x1CED, 0x1CED],
448    [0x1DC0, 0x1DE6],
449    [0x1DFC, 0x1DFF],
450    [0x200B, 0x200D],
451    [0x202F, 0x2034],
452    [0x2044, 0x2044],
453    [0x2060, 0x2064],
454    [0x206A, 0x2070],
455    [0x2074, 0x207B],
456    [0x2080, 0x208B],
457    [0x20A0, 0x20B9],
458    [0x20D0, 0x20F0],
459    [0x212E, 0x212E],
460    [0x2212, 0x2213],
461    [0x2488, 0x249B],
462    [0x2CEF, 0x2CF1],
463    [0x2D7F, 0x2D7F],
464    [0x2DE0, 0x2DFF],
465    [0x302A, 0x302F],
466    [0x3099, 0x309A],
467    [0xA66F, 0xA672],
468    [0xA67C, 0xA67D],
469    [0xA6F0, 0xA6F1],
470    [0xA802, 0xA802],
471    [0xA806, 0xA806],
472    [0xA80B, 0xA80B],
473    [0xA825, 0xA826],
474    [0xA838, 0xA839],
475    [0xA8C4, 0xA8C4],
476    [0xA8E0, 0xA8F1],
477    [0xA926, 0xA92D],
478    [0xA947, 0xA951],
479    [0xA980, 0xA982],
480    [0xA9B3, 0xA9B3],
481    [0xA9B6, 0xA9B9],
482    [0xA9BC, 0xA9BC],
483    [0xA9BC, 0xA9BC],
484    [0xAA29, 0xAA2E],
485    [0xAA31, 0xAA32],
486    [0xAA35, 0xAA36],
487    [0xAA43, 0xAA43],
488    [0xAA4C, 0xAA4C],
489    [0xAAB0, 0xAAB0],
490    [0xAAB2, 0xAAB4],
491    [0xAAB7, 0xAAB8],
492    [0xAABE, 0xAABF],
493    [0xAAC1, 0xAAC1],
494    [0xABE5, 0xABE5],
495    [0xABE8, 0xABE8],
496    [0xABED, 0xABED],
497    [0xFB1E, 0xFB1E],
498    [0xFB29, 0xFB29],
499    [0xFE00, 0xFE0F],
500    [0xFE20, 0xFE26],
501    [0xFE50, 0xFE50],
502    [0xFE52, 0xFE52],
503    [0xFE55, 0xFE55],
504    [0xFE5F, 0xFE5F],
505    [0xFE62, 0xFE63],
506    [0xFE69, 0xFE6A],
507    [0xFEFF, 0xFEFF],
508    [0xFF03, 0xFF05],
509    [0xFF0B, 0xFF1A],
510    [0xFFE0, 0xFFE1],
511    [0xFFE5, 0xFFE6],
512];
513// 900 bytes
514
515/// Check if a character is WeakCat (Weak characters)
516pub fn is_weak(c: &u16) -> bool {
517    for arr in WEAK {
518        if *c >= arr[0] && *c <= arr[1] {
519            return true;
520        }
521    }
522    false
523}
524
525/// Text types
526#[derive(PartialEq, Eq, Clone, Copy)]
527pub enum Type {
528    /// Right-to-left
529    Rtl,
530    /// Weak (affected relative to the direction of text flow)
531    Weak,
532    /// Neutral
533    Neutral,
534    /// Left-to-right
535    Ltr,
536}
537
538/// Get the type of a character
539pub fn get_type(c: &u16) -> Type {
540    if is_rtl(c) {
541        return Type::Rtl;
542    }
543    if is_neutral(c) {
544        return Type::Neutral;
545    }
546    if is_weak(c) {
547        return Type::Weak;
548    }
549    Type::Ltr
550}
551
552/// Find the dominant type in the string. skip past CtrChar until RTL or LTR is found.
553pub fn find_dominant_type(str: &[u16]) -> Type {
554    for c in str {
555        let t: Type = get_type(c);
556        if t == Type::Rtl || t == Type::Ltr {
557            return t;
558        }
559    }
560    Type::Ltr
561}
562
563#[cfg(test)]
564mod tests {
565    use super::*;
566
567    #[test]
568    fn check_is_rtl() {
569        assert!(is_rtl(&0x05C3));
570        assert!(!is_rtl(&0x01));
571    }
572}