allsorts_subset_browser/scripts/
indic.rs

1//! Implementation of font shaping for Indic scripts
2
3use log::debug;
4use unicode_general_category::GeneralCategory;
5
6use crate::error::{IndicError, ParseError, ShapingError};
7use crate::gsub::{self, FeatureMask, GlyphData, GlyphOrigin, RawGlyph, RawGlyphFlags};
8use crate::layout::{FeatureTableSubstitution, GDEFTable, LangSys, LayoutCache, LayoutTable, GSUB};
9use crate::scripts::syllable::*;
10use crate::tinyvec::tiny_vec;
11use crate::unicode::mcc::sort_by_modified_combining_class;
12use crate::{tag, DOTTED_CIRCLE};
13
14#[derive(Copy, Clone, Debug, PartialEq)]
15enum Script {
16    Devanagari,
17    Bengali,
18    Gurmukhi,
19    Gujarati,
20    Oriya,
21    Tamil,
22    Telugu,
23    Kannada,
24    Malayalam,
25    Sinhala,
26}
27
28#[derive(Copy, Clone, Debug)]
29enum BasePos {
30    // First,
31    Last,
32    LastSinhala,
33}
34
35#[derive(Copy, Clone, Debug)]
36enum RephMode {
37    Explicit,
38    Implicit,
39    LogicalRepha,
40    // VisualRepha,
41}
42
43#[derive(Copy, Clone, Debug, PartialEq)]
44enum BlwfMode {
45    PostOnly,
46    PreAndPost,
47}
48
49impl Script {
50    fn base_consonant_pos(self) -> BasePos {
51        match self {
52            Script::Devanagari => BasePos::Last,
53            Script::Bengali => BasePos::Last,
54            Script::Gurmukhi => BasePos::Last,
55            Script::Gujarati => BasePos::Last,
56            Script::Oriya => BasePos::Last,
57            Script::Tamil => BasePos::Last,
58            Script::Telugu => BasePos::Last,
59            Script::Kannada => BasePos::Last,
60            Script::Malayalam => BasePos::Last,
61            Script::Sinhala => BasePos::LastSinhala,
62        }
63    }
64
65    fn reph_position(self) -> Pos {
66        match self {
67            Script::Devanagari => Pos::BeforePost,
68            Script::Bengali => Pos::AfterSubjoined,
69            Script::Gurmukhi => Pos::BeforeSubjoined,
70            Script::Gujarati => Pos::BeforePost,
71            Script::Oriya => Pos::AfterMain,
72            Script::Tamil => Pos::AfterPost,
73            Script::Telugu => Pos::AfterPost,
74            Script::Kannada => Pos::AfterPost,
75            Script::Malayalam => Pos::AfterMain,
76            Script::Sinhala => Pos::AfterMain,
77        }
78    }
79
80    fn reph_mode(self) -> RephMode {
81        match self {
82            Script::Devanagari => RephMode::Implicit,
83            Script::Bengali => RephMode::Implicit,
84            Script::Gurmukhi => RephMode::Implicit,
85            Script::Gujarati => RephMode::Implicit,
86            Script::Oriya => RephMode::Implicit,
87            Script::Tamil => RephMode::Implicit,
88            Script::Telugu => RephMode::Explicit,
89            Script::Kannada => RephMode::Implicit,
90            Script::Malayalam => RephMode::LogicalRepha,
91            Script::Sinhala => RephMode::Explicit,
92        }
93    }
94
95    fn blwf_mode(self) -> BlwfMode {
96        match self {
97            Script::Devanagari => BlwfMode::PreAndPost,
98            Script::Bengali => BlwfMode::PreAndPost,
99            Script::Gurmukhi => BlwfMode::PreAndPost,
100            Script::Gujarati => BlwfMode::PreAndPost,
101            Script::Oriya => BlwfMode::PreAndPost,
102            Script::Tamil => BlwfMode::PreAndPost,
103            Script::Telugu => BlwfMode::PostOnly,
104            Script::Kannada => BlwfMode::PostOnly,
105            Script::Malayalam => BlwfMode::PreAndPost,
106            Script::Sinhala => BlwfMode::PreAndPost,
107        }
108    }
109
110    fn abovebase_matra_pos(self) -> Option<Pos> {
111        match self {
112            Script::Devanagari => Some(Pos::AfterSubjoined),
113            Script::Bengali => None,
114            Script::Gurmukhi => Some(Pos::AfterPost),
115            Script::Gujarati => Some(Pos::AfterSubjoined),
116            Script::Oriya => Some(Pos::AfterMain),
117            Script::Tamil => Some(Pos::AfterSubjoined),
118            Script::Telugu => Some(Pos::BeforeSubjoined),
119            Script::Kannada => Some(Pos::BeforeSubjoined),
120            Script::Malayalam => None,
121            Script::Sinhala => Some(Pos::AfterSubjoined),
122        }
123    }
124
125    fn rightside_matra_pos(self, ch: char) -> Option<Pos> {
126        match self {
127            Script::Devanagari => Some(Pos::AfterSubjoined),
128            Script::Bengali => Some(Pos::AfterPost),
129            Script::Gurmukhi => Some(Pos::AfterPost),
130            Script::Gujarati => Some(Pos::AfterPost),
131            Script::Oriya => Some(Pos::AfterPost),
132            Script::Tamil => Some(Pos::AfterPost),
133            Script::Telugu => match ch {
134                '\u{0C41}' => Some(Pos::BeforeSubjoined),
135                '\u{0C42}' => Some(Pos::BeforeSubjoined),
136                '\u{0C43}' => Some(Pos::AfterSubjoined),
137                '\u{0C44}' => Some(Pos::AfterSubjoined),
138                _ => None,
139            },
140            Script::Kannada => match ch {
141                '\u{0CBE}' => Some(Pos::BeforeSubjoined),
142                '\u{0CC0}' => Some(Pos::BeforeSubjoined),
143                '\u{0CC1}' => Some(Pos::BeforeSubjoined),
144                '\u{0CC2}' => Some(Pos::BeforeSubjoined),
145                '\u{0CC3}' => Some(Pos::AfterSubjoined),
146                '\u{0CC4}' => Some(Pos::AfterSubjoined),
147                '\u{0CD5}' => Some(Pos::AfterSubjoined),
148                '\u{0CD6}' => Some(Pos::AfterSubjoined),
149                _ => None,
150            },
151            Script::Malayalam => Some(Pos::AfterPost),
152            Script::Sinhala => Some(Pos::AfterSubjoined),
153        }
154    }
155
156    fn belowbase_matra_pos(self) -> Pos {
157        match self {
158            Script::Devanagari => Pos::AfterSubjoined,
159            Script::Bengali => Pos::AfterSubjoined,
160            Script::Gurmukhi => Pos::AfterPost,
161            Script::Gujarati => Pos::AfterPost,
162            Script::Oriya => Pos::AfterSubjoined,
163            Script::Tamil => Pos::AfterPost,
164            Script::Telugu => Pos::BeforeSubjoined,
165            Script::Kannada => Pos::BeforeSubjoined,
166            Script::Malayalam => Pos::AfterPost,
167            Script::Sinhala => Pos::AfterSubjoined,
168        }
169    }
170}
171
172#[derive(Copy, Clone, Debug, PartialEq)]
173enum ShapingModel {
174    Indic1,
175    Indic2,
176}
177
178#[derive(Copy, Clone, Debug, PartialEq)]
179enum BasicFeature {
180    Locl,
181    Nukt,
182    Akhn,
183    Rphf,
184    Rkrf,
185    Pref,
186    Blwf,
187    Abvf,
188    Half,
189    Pstf,
190    Vatu,
191    Cjct,
192    Cfar,
193}
194
195impl BasicFeature {
196    const ALL: &'static [BasicFeature] = &[
197        BasicFeature::Locl,
198        BasicFeature::Nukt,
199        BasicFeature::Akhn,
200        BasicFeature::Rphf,
201        BasicFeature::Rkrf,
202        BasicFeature::Pref,
203        BasicFeature::Blwf,
204        BasicFeature::Abvf,
205        BasicFeature::Half,
206        BasicFeature::Pstf,
207        BasicFeature::Vatu,
208        BasicFeature::Cjct,
209        BasicFeature::Cfar,
210    ];
211
212    fn tag(self) -> u32 {
213        match self {
214            BasicFeature::Locl => tag::LOCL,
215            BasicFeature::Nukt => tag::NUKT,
216            BasicFeature::Akhn => tag::AKHN,
217            BasicFeature::Rphf => tag::RPHF,
218            BasicFeature::Rkrf => tag::RKRF,
219            BasicFeature::Pref => tag::PREF,
220            BasicFeature::Blwf => tag::BLWF,
221            BasicFeature::Abvf => tag::ABVF,
222            BasicFeature::Half => tag::HALF,
223            BasicFeature::Pstf => tag::PSTF,
224            BasicFeature::Vatu => tag::VATU,
225            BasicFeature::Cjct => tag::CJCT,
226            BasicFeature::Cfar => tag::CFAR,
227        }
228    }
229
230    fn mask(self) -> FeatureMask {
231        match self {
232            BasicFeature::Locl => FeatureMask::LOCL,
233            BasicFeature::Nukt => FeatureMask::NUKT,
234            BasicFeature::Akhn => FeatureMask::AKHN,
235            BasicFeature::Rphf => FeatureMask::RPHF,
236            BasicFeature::Rkrf => FeatureMask::RKRF,
237            BasicFeature::Pref => FeatureMask::PREF,
238            BasicFeature::Blwf => FeatureMask::BLWF,
239            BasicFeature::Abvf => FeatureMask::ABVF,
240            BasicFeature::Half => FeatureMask::HALF,
241            BasicFeature::Pstf => FeatureMask::PSTF,
242            BasicFeature::Vatu => FeatureMask::VATU,
243            BasicFeature::Cjct => FeatureMask::CJCT,
244            BasicFeature::Cfar => FeatureMask::CFAR,
245        }
246    }
247
248    // Returns `true` if feature applies to the entire glyph buffer.
249    fn is_global(self) -> bool {
250        match self {
251            BasicFeature::Locl => true,
252            BasicFeature::Nukt => true,
253            BasicFeature::Akhn => true,
254            BasicFeature::Rphf => false,
255            BasicFeature::Rkrf => true,
256            BasicFeature::Pref => false,
257            BasicFeature::Blwf => false,
258            BasicFeature::Abvf => true,
259            BasicFeature::Half => false,
260            BasicFeature::Pstf => false,
261            BasicFeature::Vatu => true,
262            BasicFeature::Cjct => true,
263            BasicFeature::Cfar => true,
264        }
265    }
266}
267
268#[derive(Copy, Clone, Debug, PartialEq)]
269enum ShapingClass {
270    Bindu,
271    Visarga,
272    Avagraha,
273    Nukta,
274    Virama,
275    Cantillation,
276    GeminationMark,
277    PureKiller,
278    SyllableModifier,
279    Consonant,
280    VowelIndependent,
281    VowelDependent,
282    ConsonantDead,
283    ConsonantMedial,
284    ConsonantPlaceholder,
285    ConsonantWithStacker,
286    ConsonantPreRepha,
287    ModifyingLetter,
288    Placeholder,
289    Number,
290    Symbol,
291    Joiner,
292    NonJoiner,
293    DottedCircle,
294}
295
296#[derive(Copy, Clone, Debug)]
297enum MarkPlacementSubclass {
298    TopPosition,
299    RightPosition,
300    BottomPosition,
301    LeftPosition,
302    LeftAndRightPosition,
303    TopAndRightPosition,
304    TopAndLeftPosition,
305    TopLeftAndRightPosition,
306    TopAndBottomPosition,
307    Overstruck,
308}
309
310#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Eq, Ord)]
311enum Pos {
312    RaToBecomeReph,
313    PrebaseMatra,
314    PrebaseConsonant,
315    SyllableBase,
316    AfterMain,
317    _AbovebaseConsonant,
318    BeforeSubjoined,
319    BelowbaseConsonant,
320    AfterSubjoined,
321    BeforePost,
322    PostbaseConsonant,
323    AfterPost,
324    _FinalConsonant,
325    SMVD,
326}
327
328/////////////////////////////////////////////////////////////////////////////
329// Syllable state machine
330/////////////////////////////////////////////////////////////////////////////
331
332#[derive(Copy, Clone, Debug)]
333enum Syllable {
334    Consonant,
335    Vowel,
336    Standalone,
337    Symbol,
338    Broken,
339}
340
341fn shaping_class(ch: char) -> Option<ShapingClass> {
342    let (shaping, _) = indic_character(ch);
343    shaping
344}
345
346fn consonant(ch: char) -> bool {
347    match shaping_class(ch) {
348        Some(ShapingClass::Consonant) => !ra(ch),
349        Some(ShapingClass::ConsonantDead) => true,
350        _ => false,
351    }
352}
353
354fn vowel(ch: char) -> bool {
355    match shaping_class(ch) {
356        Some(ShapingClass::VowelIndependent) => true,
357        _ => false,
358    }
359}
360
361fn nukta(ch: char) -> bool {
362    match shaping_class(ch) {
363        Some(ShapingClass::Nukta) => true,
364        _ => false,
365    }
366}
367
368fn halant(ch: char) -> bool {
369    match shaping_class(ch) {
370        Some(ShapingClass::Virama) => true,
371        _ => false,
372    }
373}
374
375fn zwj(ch: char) -> bool {
376    match shaping_class(ch) {
377        Some(ShapingClass::Joiner) => true,
378        _ => false,
379    }
380}
381
382fn zwnj(ch: char) -> bool {
383    match shaping_class(ch) {
384        Some(ShapingClass::NonJoiner) => true,
385        _ => false,
386    }
387}
388
389fn joiner(ch: char) -> bool {
390    zwj(ch) || zwnj(ch)
391}
392
393fn ra(ch: char) -> bool {
394    match ch {
395        '\u{0930}' => true, // Devanagari
396        '\u{09B0}' => true, // Bengali
397        '\u{09F0}' => true, // Bengali, Assamese
398        '\u{0A30}' => true, // Gurmukhi
399        '\u{0AB0}' => true, // Gujarati
400        '\u{0B30}' => true, // Oriya
401        '\u{0BB0}' => true, // Tamil
402        '\u{0C30}' => true, // Telugu
403        '\u{0CB0}' => true, // Kannada
404        '\u{0D30}' => true, // Malayalam
405        '\u{0DBB}' => true, // Sinhala
406        _ => false,
407    }
408}
409
410fn matra(ch: char) -> bool {
411    match shaping_class(ch) {
412        Some(ShapingClass::VowelDependent) => true,
413        Some(ShapingClass::PureKiller) => true,
414        _ => false,
415    }
416}
417
418fn syllable_modifier(ch: char) -> bool {
419    match shaping_class(ch) {
420        Some(ShapingClass::SyllableModifier) => true,
421        Some(ShapingClass::Bindu) => true,
422        Some(ShapingClass::Visarga) => true,
423        Some(ShapingClass::GeminationMark) => true,
424        _ => false,
425    }
426}
427
428fn vedic_sign(ch: char) -> bool {
429    match shaping_class(ch) {
430        Some(ShapingClass::Cantillation) => true,
431        _ => false,
432    }
433}
434
435fn placeholder(ch: char) -> bool {
436    match shaping_class(ch) {
437        Some(ShapingClass::Number) => true,
438        Some(ShapingClass::Placeholder) => true,
439        Some(ShapingClass::ConsonantPlaceholder) => true,
440        _ => false,
441    }
442}
443
444fn dotted_circle(ch: char) -> bool {
445    match shaping_class(ch) {
446        Some(ShapingClass::DottedCircle) => true,
447        _ => false,
448    }
449}
450
451fn repha(ch: char) -> bool {
452    match shaping_class(ch) {
453        Some(ShapingClass::ConsonantPreRepha) => true,
454        _ => false,
455    }
456}
457
458fn consonant_medial(ch: char) -> bool {
459    match shaping_class(ch) {
460        Some(ShapingClass::ConsonantMedial) => true,
461        _ => false,
462    }
463}
464
465fn symbol(ch: char) -> bool {
466    match shaping_class(ch) {
467        Some(ShapingClass::Symbol) => true,
468        Some(ShapingClass::Avagraha) => true,
469        _ => false,
470    }
471}
472
473fn consonant_with_stacker(ch: char) -> bool {
474    match shaping_class(ch) {
475        Some(ShapingClass::ConsonantWithStacker) => true,
476        _ => false,
477    }
478}
479
480#[allow(dead_code)]
481fn other(ch: char) -> bool {
482    match shaping_class(ch) {
483        Some(ShapingClass::ModifyingLetter) => true,
484        _ => false,
485    }
486}
487
488fn match_c<T: SyllableChar>(cs: &[T]) -> Option<usize> {
489    match_either(match_one(consonant), match_one(ra))(cs)
490}
491
492fn match_z<T: SyllableChar>(cs: &[T]) -> Option<usize> {
493    match_one(joiner)(cs)
494}
495
496fn match_reph<T: SyllableChar>(cs: &[T]) -> Option<usize> {
497    match_either(
498        match_seq(match_one(ra), match_one(halant)),
499        match_one(repha),
500    )(cs)
501}
502
503fn match_cn<T: SyllableChar>(cs: &[T]) -> Option<usize> {
504    match_seq(
505        match_c,
506        match_optional_seq(match_one(zwj), match_optional(match_one(nukta))),
507    )(cs)
508}
509
510fn match_forced_rakar<T: SyllableChar>(cs: &[T]) -> Option<usize> {
511    match_seq(
512        match_one(zwj),
513        match_seq(match_one(halant), match_seq(match_one(zwj), match_one(ra))),
514    )(cs)
515}
516
517fn match_s<T: SyllableChar>(cs: &[T]) -> Option<usize> {
518    match_seq(match_one(symbol), match_optional(match_one(nukta)))(cs)
519}
520
521fn match_matra_group<T: SyllableChar>(cs: &[T]) -> Option<usize> {
522    match_repeat_upto(
523        3,
524        match_z,
525        match_seq(
526            match_one(matra),
527            match_optional_seq(
528                match_one(nukta),
529                match_optional(match_either(match_one(halant), match_forced_rakar)),
530            ),
531        ),
532    )(cs)
533}
534
535fn match_syllable_tail<T: SyllableChar>(cs: &[T]) -> Option<usize> {
536    match_optional_seq(
537        match_optional_seq(
538            match_z,
539            match_seq(
540                match_one(syllable_modifier),
541                match_optional_seq(
542                    match_one(syllable_modifier),
543                    match_optional(match_one(zwnj)),
544                ),
545            ),
546        ),
547        match_repeat_upto(3, match_one(vedic_sign), match_unit()),
548    )(cs)
549}
550
551fn match_halant_group<T: SyllableChar>(cs: &[T]) -> Option<usize> {
552    match_optional_seq(
553        match_z,
554        match_seq(
555            match_one(halant),
556            match_optional(match_seq(match_one(zwj), match_optional(match_one(nukta)))),
557        ),
558    )(cs)
559}
560
561// This is not used as we expand it inline
562/*
563fn match_final_halant_group<T: SyllableChar>(cs: &[T]) -> Option<usize> {
564    match_either(
565        match_halant_group,
566        match_seq(match_one(halant), match_one(zwnj)),
567    )(cs)
568}
569*/
570
571fn match_medial_group<T: SyllableChar>(cs: &[T]) -> Option<usize> {
572    match_optional(match_one(consonant_medial))(cs)
573}
574
575fn match_halant_or_matra_group<T: SyllableChar>(cs: &[T]) -> Option<usize> {
576    // this can match a short sequence so we expand and reorder it
577    match_either(
578        match_seq(match_one(halant), match_one(zwnj)),
579        // Currently deviates from spec. See:
580        // https://github.com/n8willis/opentype-shaping-documents/issues/72
581        match_either(
582            match_repeat_upto(4, match_matra_group, match_unit()),
583            match_halant_group,
584        ),
585    )(cs)
586}
587
588fn match_consonant_syllable<T: SyllableChar>(cs: &[T]) -> Option<usize> {
589    match_optional_seq(
590        match_either(match_one(repha), match_one(consonant_with_stacker)),
591        match_repeat_upto(
592            4,
593            match_seq(match_cn, match_halant_group),
594            match_seq(
595                match_cn,
596                match_seq(
597                    match_medial_group,
598                    match_seq(match_halant_or_matra_group, match_syllable_tail),
599                ),
600            ),
601        ),
602    )(cs)
603}
604
605fn match_vowel_syllable<T: SyllableChar>(cs: &[T]) -> Option<usize> {
606    match_optional_seq(
607        match_reph,
608        match_seq(
609            match_one(vowel),
610            match_optional_seq(
611                match_one(nukta),
612                match_either(
613                    match_one(zwj),
614                    match_repeat_upto(
615                        4,
616                        match_seq(match_halant_group, match_cn),
617                        match_seq(
618                            match_medial_group,
619                            match_seq(match_halant_or_matra_group, match_syllable_tail),
620                        ),
621                    ),
622                ),
623            ),
624        ),
625    )(cs)
626}
627
628fn match_standalone_syllable<T: SyllableChar>(cs: &[T]) -> Option<usize> {
629    match_either_seq(
630        match_optional_seq(
631            match_either(match_one(repha), match_one(consonant_with_stacker)),
632            match_one(placeholder),
633        ),
634        match_seq(match_optional(match_reph), match_one(dotted_circle)),
635        match_optional_seq(
636            match_one(nukta),
637            match_repeat_upto(
638                4,
639                match_seq(match_halant_group, match_cn),
640                match_seq(
641                    match_medial_group,
642                    match_seq(match_halant_or_matra_group, match_syllable_tail),
643                ),
644            ),
645        ),
646    )(cs)
647}
648
649fn match_symbol_syllable<T: SyllableChar>(cs: &[T]) -> Option<usize> {
650    match_seq(match_s, match_syllable_tail)(cs)
651}
652
653fn match_broken_syllable<T: SyllableChar>(cs: &[T]) -> Option<usize> {
654    match_nonempty(match_optional_seq(
655        match_reph,
656        match_optional_seq(
657            match_one(nukta),
658            match_repeat_upto(
659                4,
660                match_seq(match_halant_group, match_cn),
661                match_seq(
662                    match_medial_group,
663                    match_seq(match_halant_or_matra_group, match_syllable_tail),
664                ),
665            ),
666        ),
667    ))(cs)
668}
669
670fn match_syllable<T: SyllableChar>(cs: &[T]) -> Option<(usize, Syllable)> {
671    let consonant = (match_consonant_syllable(cs), Syllable::Consonant);
672    let vowel = (match_vowel_syllable(cs), Syllable::Vowel);
673    let standalone = (match_standalone_syllable(cs), Syllable::Standalone);
674    let symbol = (match_symbol_syllable(cs), Syllable::Symbol);
675    let broken = (match_broken_syllable(cs), Syllable::Broken);
676
677    // To prevent incorrect splitting (and mis-categorisation) of a syllable,
678    // greediest syllable match, wins. In the event of a tie, precedence is
679    // consonant > vowel > standalone > symbol > broken
680    let syllables = &mut [consonant, vowel, standalone, symbol, broken];
681    syllables.sort_by(|(len1, _), (len2, _)| len2.cmp(len1));
682
683    match syllables[0] {
684        (Some(len), syllable_type) => Some((len, syllable_type)),
685        (None, _) => None,
686    }
687}
688
689/////////////////////////////////////////////////////////////////////////////
690// Preprocessing
691/////////////////////////////////////////////////////////////////////////////
692
693/// Preprocess Indic character sequences. This function should be called
694/// prior to mapping Indic characters to their corresponding glyphs.
695pub(super) fn preprocess_indic(cs: &mut Vec<char>, script_tag: u32) {
696    let script = script(script_tag);
697
698    constrain_vowel(cs);
699    decompose_matra(cs);
700    sort_by_modified_combining_class(cs);
701    if script == Script::Bengali {
702        recompose_bengali_ya_nukta(cs);
703    } else if script == Script::Kannada {
704        reorder_kannada_ra_halant_zwj(cs);
705    }
706}
707
708/// Denotes if/where a constraining character should be inserted.
709enum InsertConstraint {
710    /// Insert a constraining character between a pair of characters.
711    Between,
712    /// Insert a constraining character after a pair of characters if
713    /// the `char` immediately after the pair equals the `char` contained
714    /// in `MaybeAfter`.
715    MaybeAfter(char),
716    /// Do not insert a constraining character.
717    None,
718}
719
720/// Prohibit vowel combinations that look like other vowels by inserting
721/// a constraining character in between these combinations.
722///
723/// E.g. Bengali Letter A + Bengali Sign Aa looks like Bengali Letter Aa.
724fn constrain_vowel(cs: &mut Vec<char>) {
725    let mut i = 0;
726    while i + 1 < cs.len() {
727        i += match vowel_constraint(cs[i], cs[i + 1]) {
728            InsertConstraint::Between => {
729                cs.insert(i + 1, DOTTED_CIRCLE);
730                3
731            }
732            InsertConstraint::MaybeAfter(c3) => {
733                if i + 2 < cs.len() && cs[i + 2] == c3 {
734                    cs.insert(i + 2, DOTTED_CIRCLE);
735                    4
736                } else {
737                    2
738                }
739            }
740            InsertConstraint::None => 1,
741        }
742    }
743}
744
745/// See the following link for the full list of prohibited vowel combinations:
746///
747/// https://docs.microsoft.com/en-us/typography/script-development/use#independent-vowel-iv-plus-dependent-vowel-constraints-dv
748fn vowel_constraint(c1: char, c2: char) -> InsertConstraint {
749    match (c1, c2) {
750        // Devanagari
751        ('\u{0905}', '\u{0946}') => InsertConstraint::Between,
752        ('\u{0905}', '\u{093E}') => InsertConstraint::Between,
753        ('\u{0909}', '\u{0941}') => InsertConstraint::Between,
754        ('\u{090F}', '\u{0945}') => InsertConstraint::Between,
755        ('\u{090F}', '\u{0946}') => InsertConstraint::Between,
756        ('\u{090F}', '\u{0947}') => InsertConstraint::Between,
757        ('\u{0905}', '\u{0949}') => InsertConstraint::Between,
758        ('\u{0906}', '\u{0945}') => InsertConstraint::Between,
759        ('\u{0905}', '\u{094A}') => InsertConstraint::Between,
760        ('\u{0906}', '\u{0946}') => InsertConstraint::Between,
761        ('\u{0905}', '\u{094B}') => InsertConstraint::Between,
762        ('\u{0906}', '\u{0947}') => InsertConstraint::Between,
763        ('\u{0905}', '\u{094C}') => InsertConstraint::Between,
764        ('\u{0906}', '\u{0948}') => InsertConstraint::Between,
765        ('\u{0905}', '\u{0945}') => InsertConstraint::Between,
766        ('\u{0905}', '\u{093A}') => InsertConstraint::Between,
767        ('\u{0905}', '\u{093B}') => InsertConstraint::Between,
768        ('\u{0906}', '\u{093A}') => InsertConstraint::Between,
769        ('\u{0905}', '\u{094F}') => InsertConstraint::Between,
770        ('\u{0905}', '\u{0956}') => InsertConstraint::Between,
771        ('\u{0905}', '\u{0957}') => InsertConstraint::Between,
772        // Devanagari "Reph, Letter I"
773        ('\u{0930}', '\u{094D}') => InsertConstraint::MaybeAfter('\u{0907}'),
774        // Bengali
775        ('\u{0985}', '\u{09BE}') => InsertConstraint::Between,
776        ('\u{098B}', '\u{09C3}') => InsertConstraint::Between,
777        ('\u{098C}', '\u{09E2}') => InsertConstraint::Between,
778        // Gurmukhi
779        ('\u{0A05}', '\u{0A3E}') => InsertConstraint::Between,
780        ('\u{0A72}', '\u{0A3F}') => InsertConstraint::Between,
781        ('\u{0A72}', '\u{0A40}') => InsertConstraint::Between,
782        ('\u{0A73}', '\u{0A41}') => InsertConstraint::Between,
783        ('\u{0A73}', '\u{0A42}') => InsertConstraint::Between,
784        ('\u{0A72}', '\u{0A47}') => InsertConstraint::Between,
785        ('\u{0A05}', '\u{0A48}') => InsertConstraint::Between,
786        ('\u{0A73}', '\u{0A4B}') => InsertConstraint::Between,
787        ('\u{0A05}', '\u{0A4C}') => InsertConstraint::Between,
788        // Gujarati
789        ('\u{0A85}', '\u{0ABE}') => InsertConstraint::Between,
790        ('\u{0A85}', '\u{0AC5}') => InsertConstraint::Between,
791        ('\u{0A85}', '\u{0AC7}') => InsertConstraint::Between,
792        ('\u{0A85}', '\u{0AC8}') => InsertConstraint::Between,
793        ('\u{0A85}', '\u{0AC9}') => InsertConstraint::Between,
794        ('\u{0A85}', '\u{0ACB}') => InsertConstraint::Between,
795        ('\u{0A85}', '\u{0ACC}') => InsertConstraint::Between,
796        ('\u{0AC5}', '\u{0ABE}') => InsertConstraint::Between,
797        // For the Gujarati triplets:
798        //   * ('\u{0A85}', '\u{0ABE}', '\u{0AC5}')
799        //   * ('\u{0A85}', '\u{0ABE}', '\u{0AC8}')
800        // the constraining character is inserted between the
801        // first two characters, and are therefore covered by
802        // the ('\u{0A85}', '\u{0ABE}') arm
803        // Oriya
804        ('\u{0B05}', '\u{0B3E}') => InsertConstraint::Between,
805        ('\u{0B0F}', '\u{0B57}') => InsertConstraint::Between,
806        ('\u{0B13}', '\u{0B57}') => InsertConstraint::Between,
807        // Telugu
808        ('\u{0C12}', '\u{0C55}') => InsertConstraint::Between,
809        ('\u{0C12}', '\u{0C4C}') => InsertConstraint::Between,
810        ('\u{0C3F}', '\u{0C55}') => InsertConstraint::Between,
811        ('\u{0C46}', '\u{0C55}') => InsertConstraint::Between,
812        ('\u{0C4A}', '\u{0C55}') => InsertConstraint::Between,
813        // Kannada
814        ('\u{0C89}', '\u{0CBE}') => InsertConstraint::Between,
815        ('\u{0C92}', '\u{0CCC}') => InsertConstraint::Between,
816        ('\u{0C8B}', '\u{0CBE}') => InsertConstraint::Between,
817        // Malayalam
818        ('\u{0D07}', '\u{0D57}') => InsertConstraint::Between,
819        ('\u{0D09}', '\u{0D57}') => InsertConstraint::Between,
820        ('\u{0D0E}', '\u{0D46}') => InsertConstraint::Between,
821        ('\u{0D12}', '\u{0D3E}') => InsertConstraint::Between,
822        ('\u{0D12}', '\u{0D57}') => InsertConstraint::Between,
823        // Sinhala
824        ('\u{0D85}', '\u{0DCF}') => InsertConstraint::Between,
825        ('\u{0D85}', '\u{0DD0}') => InsertConstraint::Between,
826        ('\u{0D85}', '\u{0DD1}') => InsertConstraint::Between,
827        ('\u{0D8B}', '\u{0DDF}') => InsertConstraint::Between,
828        ('\u{0D8D}', '\u{0DD8}') => InsertConstraint::Between,
829        ('\u{0D8F}', '\u{0DDF}') => InsertConstraint::Between,
830        ('\u{0D91}', '\u{0DCA}') => InsertConstraint::Between,
831        ('\u{0D91}', '\u{0DD9}') => InsertConstraint::Between,
832        ('\u{0D91}', '\u{0DDA}') => InsertConstraint::Between,
833        ('\u{0D91}', '\u{0DDC}') => InsertConstraint::Between,
834        ('\u{0D91}', '\u{0DDD}') => InsertConstraint::Between,
835        ('\u{0D94}', '\u{0DDF}') => InsertConstraint::Between,
836        // Brahmi
837        // Takri
838        // Khudawadi
839        // Tirhuta
840        // Modi
841        _ => InsertConstraint::None,
842    }
843}
844
845/// A multi-part matra's constituent parts.
846enum MatraSplit {
847    /// Not a multi-part matra.
848    None,
849    /// Two-part matra.
850    Two(char, char),
851    /// Three-part matra.
852    Three(char, char, char),
853}
854
855/// Decompose two- or three-part matras, as certain parts may be placed
856/// in different positions relative to the base.
857///
858/// E.g. Bengali "Ka, Sign O" decomposes into "Ka, Sign E, Sign Aa", then
859/// gets reordered to "Sign E, Ka, Sign Aa".
860fn decompose_matra(cs: &mut Vec<char>) {
861    let mut i = 0;
862    while i < cs.len() {
863        i += match split_matra(cs[i]) {
864            MatraSplit::None => 1,
865            MatraSplit::Two(c1, c2) => {
866                cs[i] = c1;
867                cs.insert(i + 1, c2);
868                2
869            }
870            MatraSplit::Three(c1, c2, c3) => {
871                cs[i] = c1;
872                cs.insert(i + 1, c2);
873                cs.insert(i + 2, c3);
874                3
875            }
876        }
877    }
878}
879
880fn split_matra(ch: char) -> MatraSplit {
881    match ch {
882        // Devanagari
883        // Bengali
884        '\u{09CB}' => MatraSplit::Two('\u{09C7}', '\u{09BE}'),
885        '\u{09CC}' => MatraSplit::Two('\u{09C7}', '\u{09D7}'),
886        // Gurmukhi
887        // Gujarati
888        // Oriya
889        '\u{0B48}' => MatraSplit::Two('\u{0B47}', '\u{0B56}'),
890        '\u{0B4B}' => MatraSplit::Two('\u{0B47}', '\u{0B3E}'),
891        '\u{0B4C}' => MatraSplit::Two('\u{0B47}', '\u{0B57}'),
892        // Tamil
893        '\u{0BCA}' => MatraSplit::Two('\u{0BC6}', '\u{0BBE}'),
894        '\u{0BCB}' => MatraSplit::Two('\u{0BC7}', '\u{0BBE}'),
895        '\u{0BCC}' => MatraSplit::Two('\u{0BC6}', '\u{0BD7}'),
896        // Telugu
897        '\u{0C48}' => MatraSplit::Two('\u{0C46}', '\u{0C56}'),
898        // Kannada
899        '\u{0CC0}' => MatraSplit::Two('\u{0CBF}', '\u{0CD5}'),
900        '\u{0CC7}' => MatraSplit::Two('\u{0CC6}', '\u{0CD5}'),
901        '\u{0CC8}' => MatraSplit::Two('\u{0CC6}', '\u{0CD6}'),
902        '\u{0CCA}' => MatraSplit::Two('\u{0CC6}', '\u{0CC2}'),
903        '\u{0CCB}' => MatraSplit::Three('\u{0CC6}', '\u{0CC2}', '\u{0CD5}'),
904        // Malayalam
905        '\u{0D4A}' => MatraSplit::Two('\u{0D46}', '\u{0D3E}'),
906        '\u{0D4B}' => MatraSplit::Two('\u{0D47}', '\u{0D3E}'),
907        '\u{0D4C}' => MatraSplit::Two('\u{0D46}', '\u{0D57}'),
908        // Sinhala
909        '\u{0DDA}' => MatraSplit::Two('\u{0DD9}', '\u{0DCA}'),
910        '\u{0DDC}' => MatraSplit::Two('\u{0DD9}', '\u{0DCF}'),
911        '\u{0DDD}' => MatraSplit::Three('\u{0DD9}', '\u{0DCF}', '\u{0DCA}'),
912        '\u{0DDE}' => MatraSplit::Two('\u{0DD9}', '\u{0DDF}'),
913        _ => MatraSplit::None,
914    }
915}
916
917/// Recompose Bengali "Ya, Nukta" sequences to "Yya".
918///
919/// HarfBuzz does this; we follow.
920///
921/// https://github.com/n8willis/opentype-shaping-documents/issues/74
922fn recompose_bengali_ya_nukta(cs: &mut Vec<char>) {
923    let mut i = 0;
924    while i + 1 < cs.len() {
925        if cs[i] == '\u{09AF}' && cs[i + 1] == '\u{09BC}' {
926            cs[i] = '\u{09DF}';
927            cs.remove(i + 1);
928        }
929        i += 1;
930    }
931}
932
933/// For compatibility with legacy Kannada sequences, "Ra, Halant, ZWJ" must
934/// behave like "Ra, ZWJ, Halant" such that if a consonant follows the "ZWJ"
935/// (i.e. "Ra, Halant, ZWJ, Consonant"), it should take on a subjoined form.
936///
937/// https://github.com/n8willis/opentype-shaping-documents/issues/61
938/// https://github.com/harfbuzz/harfbuzz/issues/435
939fn reorder_kannada_ra_halant_zwj(cs: &mut [char]) {
940    if cs.starts_with(&['\u{0CB0}', '\u{0CCD}', '\u{200D}']) {
941        cs.swap(1, 2);
942    }
943}
944
945/////////////////////////////////////////////////////////////////////////////
946// Shaping
947/////////////////////////////////////////////////////////////////////////////
948
949#[derive(Clone)]
950struct IndicData {
951    pos: Option<Pos>,
952    mask: FeatureMask,
953}
954
955impl GlyphData for IndicData {
956    /// Merge semantics for IndicData. The values that get used in the merged
957    /// glyph are the values belonging to the glyph with the higher merge
958    /// precedence.
959    ///
960    /// Merge precedence:
961    ///   1. SyllableBase
962    ///   2. PrebaseConsonant
963    ///   3. PostbaseConsonant (in practice, there should never be a situation
964    ///      where a PostbaseConsonant glyph is merged into a PrebaseConsonant glyph)
965    ///   4. !None
966    ///   5. None (shouldn't happen - all glyphs should be tagged by this point)
967    fn merge(data1: IndicData, data2: IndicData) -> IndicData {
968        match (data1.pos, data2.pos) {
969            (Some(Pos::SyllableBase), _) => data1,
970            (_, Some(Pos::SyllableBase)) => data2,
971            (Some(Pos::PrebaseConsonant), _) => data1,
972            (_, Some(Pos::PrebaseConsonant)) => data2,
973            (Some(Pos::PostbaseConsonant), _) => data1,
974            (_, Some(Pos::PostbaseConsonant)) => data2,
975            (_, None) => data1,
976            (None, _) => data2,
977            _ => data1, // Default
978        }
979    }
980}
981
982type RawGlyphIndic = RawGlyph<IndicData>;
983
984impl RawGlyphIndic {
985    fn is(&self, pred: impl FnOnce(char) -> bool) -> bool {
986        match self.glyph_origin {
987            GlyphOrigin::Char(c) => pred(c),
988            GlyphOrigin::Direct => false,
989        }
990    }
991
992    fn has_pos(&self, pos: Pos) -> bool {
993        match self.extra_data.pos {
994            Some(p) => p == pos,
995            None => false,
996        }
997    }
998
999    fn set_pos(&mut self, pos: Option<Pos>) {
1000        self.extra_data.pos = pos
1001    }
1002
1003    fn replace_none_pos(&mut self, pos: Option<Pos>) {
1004        assert_eq!(self.extra_data.pos, None);
1005        self.set_pos(pos)
1006    }
1007
1008    fn pos(&self) -> Option<Pos> {
1009        self.extra_data.pos
1010    }
1011
1012    fn has_mask(&self, mask: FeatureMask) -> bool {
1013        self.extra_data.mask.contains(mask)
1014    }
1015
1016    fn add_mask(&mut self, mask: FeatureMask) {
1017        self.extra_data.mask.insert(mask)
1018    }
1019
1020    fn remove_mask(&mut self, mask: FeatureMask) {
1021        self.extra_data.mask.remove(mask)
1022    }
1023}
1024
1025struct IndicShapingData<'tables> {
1026    gsub_cache: &'tables LayoutCache<GSUB>,
1027    gsub_table: &'tables LayoutTable<GSUB>,
1028    gdef_table: Option<&'tables GDEFTable>,
1029    langsys: &'tables LangSys,
1030    script_tag: u32,
1031    lang_tag: Option<u32>,
1032    script: Script,
1033    shaping_model: ShapingModel,
1034    feature_variations: Option<&'tables FeatureTableSubstitution<'tables>>,
1035}
1036
1037impl IndicShapingData<'_> {
1038    fn feature_would_apply(
1039        &self,
1040        feature_tag: u32,
1041        glyphs: &[RawGlyphIndic],
1042        start_index: usize,
1043    ) -> Result<bool, ParseError> {
1044        gsub::gsub_feature_would_apply(
1045            self.gsub_cache,
1046            self.gsub_table,
1047            self.gdef_table,
1048            self.langsys,
1049            self.feature_variations,
1050            feature_tag,
1051            glyphs,
1052            start_index,
1053        )
1054    }
1055
1056    fn get_lookups_cache_index(&self, mask: FeatureMask) -> Result<usize, ParseError> {
1057        gsub::get_lookups_cache_index(
1058            self.gsub_cache,
1059            self.script_tag,
1060            self.lang_tag,
1061            self.feature_variations,
1062            mask,
1063        )
1064    }
1065
1066    fn apply_lookup(
1067        &self,
1068        lookup_index: usize,
1069        feature_tag: u32,
1070        glyphs: &mut Vec<RawGlyphIndic>,
1071        pred: impl Fn(&RawGlyphIndic) -> bool,
1072    ) -> Result<(), ParseError> {
1073        gsub::gsub_apply_lookup(
1074            self.gsub_cache,
1075            self.gsub_table,
1076            self.gdef_table,
1077            lookup_index,
1078            feature_tag,
1079            None,
1080            glyphs,
1081            0,
1082            glyphs.len(),
1083            pred,
1084        )?;
1085        Ok(())
1086    }
1087}
1088
1089/// Does the following:
1090///   * Splits syllables
1091///   * Inserts dotted circles into broken syllables
1092///   * Initial reordering
1093///   * Applies basic features
1094///   * Final reordering
1095///   * Applies presentation features
1096pub fn gsub_apply_indic<'a>(
1097    dotted_circle_index: u16,
1098    gsub_cache: &'a LayoutCache<GSUB>,
1099    gsub_table: &'a LayoutTable<GSUB>,
1100    gdef_table: Option<&'a GDEFTable>,
1101    indic1_tag: u32,
1102    lang_tag: Option<u32>,
1103    feature_variations: Option<&'a FeatureTableSubstitution<'a>>,
1104    glyphs: &mut Vec<RawGlyph<()>>,
1105) -> Result<(), ShapingError> {
1106    if glyphs.is_empty() {
1107        return Err(IndicError::EmptyBuffer.into());
1108    }
1109
1110    // Currently, the script tag that gets passed from Mercury is the Indic1 tag.
1111    // Map this to the Indic2 tag, as we want to check if a font supports it
1112    let indic2_tag = indic2_tag(indic1_tag);
1113
1114    // Priority: Indic2 > Indic1 > Default
1115    let (script_tag, shaping_model, script_table) = match gsub_table.find_script(indic2_tag)? {
1116        Some(script_table) => (indic2_tag, ShapingModel::Indic2, script_table),
1117        None => match gsub_table.find_script_or_default(indic1_tag)? {
1118            Some(script_table) => (indic1_tag, ShapingModel::Indic1, script_table),
1119            None => return Ok(()),
1120        },
1121    };
1122
1123    let langsys = match script_table.find_langsys_or_default(lang_tag)? {
1124        Some(langsys) => langsys,
1125        None => return Ok(()),
1126    };
1127
1128    let mut syllables = to_indic_syllables(glyphs);
1129    let script = script(indic1_tag);
1130    let shaping_data = IndicShapingData {
1131        gsub_cache,
1132        gsub_table,
1133        gdef_table,
1134        langsys,
1135        script_tag,
1136        lang_tag,
1137        script,
1138        shaping_model,
1139        feature_variations,
1140    };
1141
1142    for i in 0..syllables.len() {
1143        // For application of INIT. If a left matra is not word-initial,
1144        // HarfBuzz applies INIT iff the preceding character falls outside
1145        // a range of GeneralCategory classes. We follow suit.
1146        let is_first_syllable = if i == 0 {
1147            true
1148        } else if let Some(prev_glyph) = syllables[i - 1].0.iter().last() {
1149            match prev_glyph.glyph_origin {
1150                GlyphOrigin::Char(c) => {
1151                    let gc = unicode_general_category::get_general_category(c);
1152                    !(gc == GeneralCategory::Format
1153                        || gc == GeneralCategory::Unassigned
1154                        || gc == GeneralCategory::PrivateUse
1155                        || gc == GeneralCategory::Surrogate
1156                        || gc == GeneralCategory::LowercaseLetter
1157                        || gc == GeneralCategory::ModifierLetter
1158                        || gc == GeneralCategory::OtherLetter
1159                        || gc == GeneralCategory::TitlecaseLetter
1160                        || gc == GeneralCategory::UppercaseLetter
1161                        || gc == GeneralCategory::SpacingMark
1162                        || gc == GeneralCategory::EnclosingMark
1163                        || gc == GeneralCategory::NonspacingMark)
1164                }
1165                GlyphOrigin::Direct => false,
1166            }
1167        } else {
1168            true
1169        };
1170
1171        let (syllable, syllable_type) = &mut syllables[i];
1172        if let Err(err) = shape_syllable(
1173            dotted_circle_index,
1174            &shaping_data,
1175            syllable,
1176            syllable_type,
1177            is_first_syllable,
1178        ) {
1179            debug!("gsub apply indic: {}", err);
1180        }
1181    }
1182
1183    *glyphs = syllables
1184        .into_iter()
1185        .flat_map(|(s, _)| s.into_iter())
1186        .map(from_raw_glyph_indic)
1187        .collect();
1188
1189    Ok(())
1190}
1191
1192fn shape_syllable(
1193    dotted_circle_index: u16,
1194    shaping_data: &IndicShapingData<'_>,
1195    syllable: &mut Vec<RawGlyphIndic>,
1196    syllable_type: &Option<Syllable>,
1197    is_first_syllable: bool,
1198) -> Result<(), ShapingError> {
1199    // Add a dotted circle to broken syllables so they can be treated
1200    // like standalone syllables
1201    // https://github.com/n8willis/opentype-shaping-documents/issues/45
1202    if let Some(Syllable::Broken) = syllable_type {
1203        insert_dotted_circle(dotted_circle_index, shaping_data.script, syllable)?;
1204    }
1205
1206    match syllable_type {
1207        // HarfBuzz treats vowel and standalone syllables like consonant
1208        // syllables. We follow suit
1209        // https://github.com/n8willis/opentype-shaping-documents/issues/45
1210        Some(Syllable::Consonant)
1211        | Some(Syllable::Vowel)
1212        | Some(Syllable::Standalone)
1213        | Some(Syllable::Broken) => {
1214            initial_reorder_consonant_syllable(shaping_data, syllable)?;
1215            apply_basic_features(shaping_data, syllable)?;
1216            final_reorder_consonant_syllable(shaping_data, syllable);
1217            apply_presentation_features(shaping_data, is_first_syllable, syllable)?;
1218        }
1219        Some(Syllable::Symbol) | None => {}
1220    }
1221
1222    Ok(())
1223}
1224
1225/// https://github.com/n8willis/opentype-shaping-documents/issues/45
1226fn insert_dotted_circle(
1227    dotted_circle_index: u16,
1228    script: Script,
1229    glyphs: &mut Vec<RawGlyphIndic>,
1230) -> Result<(), IndicError> {
1231    if dotted_circle_index == 0 {
1232        return Err(IndicError::MissingDottedCircle);
1233    }
1234
1235    let dotted_circle = RawGlyphIndic {
1236        unicodes: tiny_vec![[char; 1] => DOTTED_CIRCLE],
1237        glyph_index: dotted_circle_index,
1238        liga_component_pos: 0,
1239        glyph_origin: GlyphOrigin::Char(DOTTED_CIRCLE),
1240        flags: RawGlyphFlags::empty(),
1241        variation: None,
1242        extra_data: IndicData {
1243            pos: None,
1244            mask: FeatureMask::empty(),
1245        },
1246    };
1247
1248    let mut pos = 0;
1249    if let (Script::Malayalam, Some(glyph)) = (script, glyphs.first()) {
1250        // Insert dotted circle after possible "Repha"
1251        if glyph.is(repha) {
1252            pos = 1;
1253        }
1254    }
1255    glyphs.insert(pos, dotted_circle);
1256
1257    Ok(())
1258}
1259
1260/// Maps an Indic1 script tag to its corresponding `Script` variant.
1261fn script(indic1_tag: u32) -> Script {
1262    match indic1_tag {
1263        tag::DEVA => Script::Devanagari,
1264        tag::BENG => Script::Bengali,
1265        tag::GURU => Script::Gurmukhi,
1266        tag::GUJR => Script::Gujarati,
1267        tag::ORYA => Script::Oriya,
1268        tag::TAML => Script::Tamil,
1269        tag::TELU => Script::Telugu,
1270        tag::KNDA => Script::Kannada,
1271        tag::MLYM => Script::Malayalam,
1272        tag::SINH => Script::Sinhala,
1273        _ => panic!("Expected an Indic1 script tag"),
1274    }
1275}
1276
1277/// Maps an Indic1 script tag to its corresponding Indic2 script tag.
1278pub fn indic2_tag(indic1_tag: u32) -> u32 {
1279    match indic1_tag {
1280        tag::DEVA => tag::DEV2,
1281        tag::BENG => tag::BNG2,
1282        tag::GURU => tag::GUR2,
1283        tag::GUJR => tag::GJR2,
1284        tag::ORYA => tag::ORY2,
1285        tag::TAML => tag::TML2,
1286        tag::TELU => tag::TEL2,
1287        tag::KNDA => tag::KND2,
1288        tag::MLYM => tag::MLM2,
1289        tag::SINH => tag::SINH, // For simplicity, just return the Indic1 Sinhala tag
1290        _ => panic!("Expected an Indic1 script tag"),
1291    }
1292}
1293
1294/// Splits the input glyph buffer and collects it into a vector of Indic syllables.
1295fn to_indic_syllables(mut glyphs: &[RawGlyph<()>]) -> Vec<(Vec<RawGlyphIndic>, Option<Syllable>)> {
1296    let mut syllables: Vec<(Vec<RawGlyphIndic>, Option<Syllable>)> = Vec::new();
1297
1298    while !glyphs.is_empty() {
1299        let len = match match_syllable(glyphs) {
1300            Some((len, syllable_type)) => {
1301                assert_ne!(len, 0);
1302
1303                let syllable = glyphs[..len].iter().map(to_raw_glyph_indic).collect();
1304                syllables.push((syllable, Some(syllable_type)));
1305
1306                len
1307            }
1308            None => {
1309                let invalid_glyph = to_raw_glyph_indic(&glyphs[0]);
1310                match syllables.last_mut() {
1311                    // If the last syllable in `syllables` is invalid, just append
1312                    // this invalid glyph to that syllable
1313                    Some((invalid_syllable, None)) => invalid_syllable.push(invalid_glyph),
1314                    // Collect invalid glyphs
1315                    _ => syllables.push((vec![invalid_glyph], None)),
1316                }
1317
1318                1
1319            }
1320        };
1321
1322        glyphs = &glyphs[len..];
1323    }
1324
1325    syllables
1326}
1327
1328/////////////////////////////////////////////////////////////////////////////
1329// Initial reordering
1330/////////////////////////////////////////////////////////////////////////////
1331
1332fn initial_reorder_consonant_syllable(
1333    shaping_data: &IndicShapingData<'_>,
1334    glyphs: &mut [RawGlyphIndic],
1335) -> Result<(), ShapingError> {
1336    // 2.1 Base consonant
1337    if let Some(base_index) = tag_consonants(shaping_data, glyphs)? {
1338        initial_reorder_consonant_syllable_with_base(shaping_data, base_index, glyphs)
1339    } else {
1340        initial_reorder_consonant_syllable_without_base(glyphs)
1341    }
1342}
1343
1344fn initial_reorder_consonant_syllable_with_base(
1345    shaping_data: &IndicShapingData<'_>,
1346    base_index: usize,
1347    glyphs: &mut [RawGlyphIndic],
1348) -> Result<(), ShapingError> {
1349    // 2.2 Matra decomposition
1350    // IMPLEMENTATION: Handled in the text preprocessing stage.
1351
1352    // 2.3 Tag decomposed matras
1353    let glyphs_without_pos = glyphs.iter_mut().filter(|g| g.pos().is_none());
1354    for glyph in glyphs_without_pos {
1355        if let GlyphOrigin::Char(c) = glyph.glyph_origin {
1356            let pos = matra_pos(c, shaping_data.script);
1357            glyph.replace_none_pos(pos);
1358        }
1359    }
1360
1361    // 2.4 Adjacent marks
1362    // IMPLEMENTATION: Handled in the text preprocessing stage.
1363
1364    // 2.5 Pre-base consonants
1365    // 2.6 Reph
1366    // 2.7 Post-base consonants
1367    // IMPLEMENTATION: Handled in 2.1
1368
1369    // 2.8 Mark tagging
1370    fn smvd_mark(c: char) -> bool {
1371        match shaping_class(c) {
1372            Some(ShapingClass::Bindu)
1373            | Some(ShapingClass::Visarga)
1374            | Some(ShapingClass::Avagraha)
1375            | Some(ShapingClass::Cantillation)
1376            | Some(ShapingClass::SyllableModifier)
1377            | Some(ShapingClass::GeminationMark)
1378            | Some(ShapingClass::Symbol) => true,
1379            _ => false,
1380        }
1381    }
1382
1383    fn remaining_mark(c: char) -> bool {
1384        match shaping_class(c) {
1385            Some(ShapingClass::Nukta)
1386            | Some(ShapingClass::Virama)
1387            | Some(ShapingClass::PureKiller)
1388            | Some(ShapingClass::Joiner)
1389            | Some(ShapingClass::NonJoiner) => true,
1390            _ => false,
1391        }
1392    }
1393
1394    // 2.8.1 Marks in the BINDU, VISARGA, AVAGRAHA, CANTILLATION, SYLLABLE_MODIFIER,
1395    // GEMINATION_MARK, and SYMBOL categories should be tagged with POS_SMVD.
1396    let glyphs_smvd = glyphs.iter_mut().filter(|g| g.is(smvd_mark));
1397    for glyph in glyphs_smvd {
1398        let pos = match glyph.glyph_origin {
1399            // Oriya's "Candrabindu" must be tagged with POS_BEFORE_SUBJOINED
1400            GlyphOrigin::Char('\u{0B01}') => Pos::BeforeSubjoined,
1401            _ => Pos::SMVD,
1402        };
1403        glyph.replace_none_pos(Some(pos));
1404    }
1405
1406    // 2.8.2 All remaining marks must be tagged with the same positioning tag as the
1407    // closest non-mark character the mark has affinity with, so that they move
1408    // together during the sorting step.
1409    //
1410    // NOTE: In this step, joiner and non-joiner characters must also be tagged
1411    // according to the same rules given for marks, even though these characters
1412    // are not categorized as marks in Unicode.
1413    let mut prev_pos = None;
1414    for i in 0..glyphs.len() {
1415        if glyphs[i].is(remaining_mark) && prev_pos.is_some() {
1416            // HarfBuzz and Uniscribe do not move a "Halant" if it follows
1417            // a pre-base matra
1418            //
1419            // https://github.com/n8willis/opentype-shaping-documents/issues/63
1420            if glyphs[i].is(halant) && prev_pos == Some(Pos::PrebaseMatra) {
1421                let first_non_matra_pos = glyphs[..i]
1422                    .iter()
1423                    .rev()
1424                    .filter_map(RawGlyphIndic::pos)
1425                    .find(|pos| *pos != Pos::PrebaseMatra);
1426
1427                if first_non_matra_pos.is_some() {
1428                    glyphs[i].replace_none_pos(first_non_matra_pos);
1429                }
1430            } else {
1431                glyphs[i].replace_none_pos(prev_pos);
1432            }
1433        } else if !glyphs[i].is(smvd_mark) {
1434            assert_ne!(glyphs[i].pos(), None);
1435            prev_pos = glyphs[i].pos();
1436        }
1437    }
1438
1439    // 2.8.3 For all marks preceding the base consonant, the mark must be tagged
1440    // with the same positioning tag as the closest preceding non-mark consonant.
1441    //
1442    // IMPLEMENTATION: Already tagged in 2.8.2
1443
1444    // 2.8.4 For all marks occurring after the base consonant, the mark must be tagged
1445    // with the same positioning tag as the closest subsequent consonant.
1446    //
1447    // NOTE: In this step, joiner and non-joiner characters must also be tagged
1448    // according to the same rules given for marks, even though these characters
1449    // are not categorized as marks in Unicode.
1450    let mut next_pos = None;
1451    for glyph in glyphs[(base_index + 1)..].iter_mut().rev() {
1452        if glyph.is(remaining_mark) && next_pos.is_some() {
1453            // No assertion, as some marks may have already been tagged
1454            // in 2.8.2. Overwrite instead
1455            glyph.set_pos(next_pos);
1456        } else if glyph.is(effectively_consonant) {
1457            assert_ne!(glyph.pos(), None); // Consonant should be tagged by now
1458            next_pos = glyph.pos();
1459        }
1460    }
1461
1462    // Check that no glyphs have been left untagged, then reorder glyphs
1463    // to canonical order
1464    if glyphs.iter().any(|g| g.pos().is_none()) {
1465        return Err(IndicError::MissingTags.into());
1466    } else {
1467        glyphs.sort_by_key(|g| g.pos());
1468    }
1469
1470    // Get base consonant position again, after reorder
1471    let base_index = glyphs
1472        .iter()
1473        .position(|g| g.has_pos(Pos::SyllableBase))
1474        .ok_or_else::<ShapingError, _>(|| IndicError::MissingBaseConsonant.into())?;
1475
1476    // Handle Indic1 script tags. Move the first post-base "Halant" after the last
1477    // post-base consonant
1478    if shaping_data.shaping_model == ShapingModel::Indic1 {
1479        let glyphs_post_base = &mut glyphs[(base_index + 1)..];
1480        let first_halant = glyphs_post_base.iter().position(|g| g.is(halant));
1481        let last_consonant = glyphs_post_base
1482            .iter()
1483            .rposition(|g| g.is(effectively_consonant));
1484
1485        if let (Some(first_halant), Some(last_consonant)) = (first_halant, last_consonant) {
1486            // The comments in HarfBuzz state that for _some_ scripts, Uniscribe
1487            // does not move the "Halant" if a "Halant" already follows the last
1488            // post-base consonant. Kannada is one such script
1489            //
1490            // https://github.com/n8willis/opentype-shaping-documents/issues/64
1491            if shaping_data.script == Script::Kannada {
1492                let has_halant_after_last_consonant = glyphs_post_base[(last_consonant + 1)..]
1493                    .iter()
1494                    .rev()
1495                    .any(|g| g.is(halant));
1496
1497                if !has_halant_after_last_consonant {
1498                    move_element(glyphs_post_base, first_halant, last_consonant);
1499                }
1500            } else {
1501                move_element(glyphs_post_base, first_halant, last_consonant);
1502            }
1503        }
1504    }
1505
1506    // Set the appropriate feature masks
1507    for glyph in glyphs.iter_mut() {
1508        let mask = match glyph.pos() {
1509            Some(Pos::RaToBecomeReph) => BasicFeature::Rphf.mask(),
1510            Some(Pos::PrebaseConsonant) => {
1511                if shaping_data.shaping_model != ShapingModel::Indic1
1512                    && shaping_data.script.blwf_mode() == BlwfMode::PreAndPost
1513                {
1514                    BasicFeature::Half.mask() | BasicFeature::Blwf.mask()
1515                } else {
1516                    BasicFeature::Half.mask()
1517                }
1518            }
1519            Some(Pos::BelowbaseConsonant) => BasicFeature::Blwf.mask(),
1520            Some(Pos::PostbaseConsonant) => BasicFeature::Pstf.mask(),
1521            _ => FeatureMask::empty(),
1522        };
1523
1524        glyph.add_mask(mask);
1525    }
1526
1527    // Remove BLWF mask from pre-base sequences that end with "Halant, ZWJ"
1528    // There is reason to believe that Uniscribe does this.
1529    //
1530    // Example, using Noto Sans/Serif Bengali:
1531    //   [Ka, Halant, Ba, Halant, Ba, Halant, Ka (Base)]
1532    //     * [Ka, Halant] takes on half form
1533    //     * [Ba, Halant]s take on subjoined form
1534    //   [Ka, Halant, Ba, Halant, Ba, Halant, ZWJ, Ka (Base)]
1535    //     * [Ka, Halant] takes on half form
1536    //     * [Ba, Halant]s take on half form in Uniscribe
1537    let last_explicit_half_form_index = glyphs[..base_index]
1538        .windows(2)
1539        .rposition(|gs| gs[0].is(halant) && gs[1].is(zwj))
1540        .map(|i| i + 1); // ZWJ index
1541
1542    if let Some(last_explicit_half_form_index) = last_explicit_half_form_index {
1543        glyphs[..=last_explicit_half_form_index]
1544            .iter_mut()
1545            .for_each(|g| g.remove_mask(BasicFeature::Blwf.mask()));
1546    }
1547
1548    // ...except non-initial, pre-base "Ra, Halant" sequences in Devanagari
1549    // This is to allow the application of the VATU feature
1550    //
1551    // https://github.com/n8willis/opentype-shaping-documents/issues/65
1552    if shaping_data.shaping_model == ShapingModel::Indic1
1553        && shaping_data.script == Script::Devanagari
1554    {
1555        // Collect all pre-base "Ra" indices
1556        //
1557        // IMPLEMENTATION: Includes possible "Reph", but because
1558        // RPHF is applied before BLWF, it shouldn't matter
1559        let mut ra_indices = Vec::new();
1560        let mut iter = glyphs[..(base_index + 1)].windows(3).enumerate();
1561        while let Some((i, [g0, g1, g2])) = iter.next() {
1562            if g0.is(ra) && g1.is(halant) && !g2.is(zwj) {
1563                ra_indices.push(i)
1564            }
1565        }
1566
1567        let mask = BasicFeature::Blwf.mask();
1568        for i in ra_indices {
1569            glyphs[i].add_mask(mask);
1570            glyphs[i + 1].add_mask(mask);
1571        }
1572    }
1573
1574    // Add PREF mask to pre-base-reordering "Ra" sequences in Malayalam/Telugu
1575    if shaping_data.script == Script::Malayalam || shaping_data.script == Script::Telugu {
1576        let glyphs_post_base = &mut glyphs[(base_index + 1)..];
1577
1578        // Find the first occurrence of pre-base-reordering "Ra".
1579        // Only one can exist per syllable
1580        let prebase_reordering_ra_index = match shaping_data.shaping_model {
1581            ShapingModel::Indic1 => glyphs_post_base
1582                .windows(2)
1583                .position(|gs| gs[0].is(ra) && gs[1].is(halant)),
1584            ShapingModel::Indic2 => glyphs_post_base
1585                .windows(2)
1586                .position(|gs| gs[0].is(halant) && gs[1].is(ra)),
1587        };
1588
1589        if let Some(prebase_reordering_ra_index) = prebase_reordering_ra_index {
1590            if shaping_data.feature_would_apply(
1591                BasicFeature::Pref.tag(),
1592                glyphs_post_base,
1593                prebase_reordering_ra_index,
1594            )? {
1595                let mask = BasicFeature::Pref.mask();
1596                glyphs_post_base[prebase_reordering_ra_index].add_mask(mask);
1597                glyphs_post_base[prebase_reordering_ra_index + 1].add_mask(mask);
1598            }
1599        }
1600    }
1601
1602    Ok(())
1603}
1604
1605/// Handle consonant glyphs that lack a base consonant. Mimics Uniscribe's
1606/// behaviour.
1607///
1608/// Some examples to illustrate how Uniscribe's behaviour differs from HarfBuzz's:
1609///
1610/// ```text
1611///            Font: Noto Sans Bengali
1612///                  (or any Indic2 font with a Reph, subjoined, and half forms).
1613///
1614///
1615/// Test sequence 1: [Ka, Halant, Ba, Halant, ZWJ]
1616///
1617///        HarfBuzz: [Ka, Halant, Ba+Halant (BLWF), ZWJ]
1618///                  HB terminates the base consonant search on [Halant, ZWJ]. No
1619///                  base is found, and by default HB considers all consonants
1620///                  pre-base. `bng2` has the `BLWF_MODE_PRE_AND_POST` characteristic,
1621///                  therefore pre-base [Ba, Halant] takes on a subjoined form.
1622///
1623///       Uniscribe: [Ka+Halant (HALF), Ba+Halant+ZWJ (HALF)]
1624///                  Uniscribe appears to terminate the base consonant search
1625///                  too, but only applies the HALF feature to the syllable.
1626///
1627///
1628/// Test sequence 2: [Ra, Halant, Ba, Halant, ZWJ]
1629///
1630///        HarfBuzz: [Ra+Halant+Ba (BLWF+(CJCT|PRES)), Halant, ZWJ]
1631///                  On encountering a possible Reph, HB marks the Ra as a
1632///                  possible base (in the event that Ra is the only consonant).
1633///                  Base consonant search terminates on [Halant, ZWJ]. Ra
1634///                  remains the base; therefore post-base [Halant, Ba] takes
1635///                  on a subjoined form.
1636///
1637///       Uniscribe: [Ra+Halant (RPHF), Ba+Halant+ZWJ (HALF)]
1638///                  Uniscribe chooses to shape the Reph, and positions it
1639///                  on the Ba half form.
1640/// ```
1641fn initial_reorder_consonant_syllable_without_base(
1642    glyphs: &mut [RawGlyphIndic],
1643) -> Result<(), ShapingError> {
1644    // IMPLEMENTATION: Considering the analysis above:
1645    //
1646    // No reordering is necessary, therefore the only glyph that requires
1647    // a `Pos` tag is the syllable-initial Ra iff it is to form a Reph.
1648    // This is taken care of in `tag_consonants`.
1649    //
1650    // NOTE: Our GSUB implementation is such that the remaining glyphs
1651    // that constitute the Reph do not need to be tagged or masked.
1652    for glyph in glyphs.iter_mut() {
1653        let mask = match glyph.pos() {
1654            Some(Pos::RaToBecomeReph) => BasicFeature::Rphf.mask(),
1655            _ => BasicFeature::Half.mask(),
1656        };
1657
1658        glyph.add_mask(mask);
1659    }
1660
1661    Ok(())
1662}
1663
1664/// Assign `Pos` tags to consonants in a syllable. Return the index of the base consonant, or `None`
1665/// if base consonant does not exist.
1666fn tag_consonants(
1667    shaping_data: &IndicShapingData<'_>,
1668    glyphs: &mut [RawGlyphIndic],
1669) -> Result<Option<usize>, ShapingError> {
1670    let has_reph = has_reph(shaping_data, glyphs)?;
1671    let start_prebase_index;
1672    if has_reph {
1673        start_prebase_index = match shaping_data.script.reph_mode() {
1674            RephMode::Implicit => 2,
1675            RephMode::Explicit => 3,
1676            RephMode::LogicalRepha => 1,
1677        };
1678        glyphs[0].replace_none_pos(Some(Pos::RaToBecomeReph));
1679    } else {
1680        start_prebase_index = 0;
1681    };
1682
1683    let base_index = match shaping_data.script.base_consonant_pos() {
1684        BasePos::Last => {
1685            tag_postbase_consonants(shaping_data, start_prebase_index, has_reph, glyphs)
1686        }
1687        BasePos::LastSinhala => tag_postbase_consonants_sinhala(start_prebase_index, glyphs),
1688    }?;
1689
1690    if shaping_data.script == Script::Gurmukhi {
1691        tag_consonant_medials(glyphs);
1692    }
1693
1694    // Tag base and pre-base consonants.
1695    if let Some(base_index) = base_index {
1696        // No untagged assertion, as this potentially replaces `Pos::RaToBecomeReph`.
1697        glyphs[base_index].set_pos(Some(Pos::SyllableBase));
1698        if start_prebase_index < base_index {
1699            glyphs[start_prebase_index..base_index]
1700                .iter_mut()
1701                .filter(|g| g.is(effectively_consonant))
1702                .for_each(|g| g.replace_none_pos(Some(Pos::PrebaseConsonant)));
1703        }
1704    }
1705
1706    Ok(base_index)
1707}
1708
1709/// Assign `Pos` tags to post-base consonants (non-Sinhala scripts). Return the index of the base
1710/// consonant, or `None` if base consonant does not exist.
1711fn tag_postbase_consonants(
1712    shaping_data: &IndicShapingData<'_>,
1713    start_prebase_index: usize,
1714    has_reph: bool,
1715    glyphs: &mut [RawGlyphIndic],
1716) -> Result<Option<usize>, ShapingError> {
1717    let mut base_index = if has_reph {
1718        match shaping_data.script.reph_mode() {
1719            // "Ra" is still a base candidate if it is the only consonant in the syllable.
1720            RephMode::Implicit => Some(0),
1721            // "Ra" is never a base candidate, as "Reph" is always formed. (HarfBuzz, Uniscribe and
1722            // CoreText take this approach with Sinhala. Not sure about Telugu.)
1723            // https://github.com/n8willis/opentype-shaping-documents/issues/81.
1724            RephMode::Explicit => None,
1725            // "Repha" is not a consonant.
1726            RephMode::LogicalRepha => None,
1727        }
1728    } else {
1729        None
1730    };
1731    let mut i = glyphs.len() - 1;
1732    let mut seen_belowbase = false;
1733
1734    while i >= start_prebase_index {
1735        if i == start_prebase_index {
1736            if glyphs[i].is(effectively_consonant) {
1737                base_index = Some(i);
1738            }
1739            break;
1740        }
1741
1742        let j = i - 1;
1743        if glyphs[i].is(effectively_consonant) {
1744            if !glyphs[j].is(halant) {
1745                base_index = Some(i);
1746                break;
1747            }
1748
1749            // HACK: Reorder "Halant, Consonant" to "Consonant, Halant" for Indic1 compatibility.
1750            if shaping_data.shaping_model == ShapingModel::Indic1 {
1751                glyphs.swap(i, j);
1752            }
1753
1754            let pos = postbase_tag(shaping_data, seen_belowbase, glyphs, j)?;
1755
1756            // HACK: Undo the reorder.
1757            if shaping_data.shaping_model == ShapingModel::Indic1 {
1758                glyphs.swap(i, j);
1759            }
1760
1761            // A consonant cannot be base if it has a {below, post, pre}-base reordering form.
1762            if let Some(pos) = pos {
1763                glyphs[i].replace_none_pos(Some(pos));
1764                if pos == Pos::BelowbaseConsonant {
1765                    seen_belowbase = true;
1766                }
1767                i -= 2;
1768            } else {
1769                base_index = Some(i);
1770                break;
1771            }
1772        } else if glyphs[i].is(zwj) && glyphs[j].is(halant) {
1773            // Terminate base search on "Halant, ZWJ". Mimics HarfBuzz (and possibly Uniscribe).
1774            base_index = None;
1775            break;
1776        } else {
1777            i -= 1;
1778        }
1779    }
1780
1781    Ok(base_index)
1782}
1783
1784/// Assign `Pos` tags to post-base consonants (Sinhala). Return the index of the base consonant, or
1785/// `None` if base consonant does not exist.
1786fn tag_postbase_consonants_sinhala(
1787    start_prebase_index: usize,
1788    glyphs: &mut [RawGlyphIndic],
1789) -> Result<Option<usize>, ShapingError> {
1790    let mut base_index = None; // Sinhala is `RephMode:: Explicit`, so this is always `None`.
1791    let mut i = glyphs.len() - 1;
1792
1793    while i >= start_prebase_index {
1794        if i == start_prebase_index {
1795            if glyphs[i].is(effectively_consonant) {
1796                base_index = Some(i);
1797            }
1798            break;
1799        }
1800
1801        let j = i - 1;
1802        if glyphs[i].is(effectively_consonant) {
1803            // A consonant cannot be base if it is preceded by a "ZWJ". (In Sinhala text, this
1804            // sequence is used to specify the subjoined form of said consonant.)
1805            if glyphs[j].is(zwj) {
1806                glyphs[i].replace_none_pos(Some(Pos::BelowbaseConsonant));
1807            } else {
1808                base_index = Some(i);
1809                break;
1810            }
1811        }
1812
1813        i -= 1;
1814    }
1815
1816    Ok(base_index)
1817}
1818
1819/// Return a `Pos` tag for a (possible) postbase consonant.
1820///
1821/// https://github.com/n8willis/opentype-shaping-documents/issues/66
1822fn postbase_tag(
1823    shaping_data: &IndicShapingData<'_>,
1824    seen_belowbase: bool,
1825    glyphs: &[RawGlyphIndic],
1826    start_index: usize,
1827) -> Result<Option<Pos>, ShapingError> {
1828    const FEATURE_POS_PAIRS: &[(BasicFeature, Pos)] = &[
1829        (BasicFeature::Blwf, Pos::BelowbaseConsonant),
1830        (BasicFeature::Pstf, Pos::PostbaseConsonant),
1831        (BasicFeature::Pref, Pos::PostbaseConsonant),
1832    ];
1833
1834    let applicable_feature_pos_pairs = if seen_belowbase {
1835        // Post-base and pre-base-reordering forms must follow below-base forms
1836        &FEATURE_POS_PAIRS[..1]
1837    } else {
1838        // Pre-base reordering forms only occur in Malayalam and Telugu scripts
1839        match shaping_data.script {
1840            Script::Malayalam | Script::Telugu => FEATURE_POS_PAIRS,
1841            _ => &FEATURE_POS_PAIRS[..2],
1842        }
1843    };
1844
1845    for (basic_feature, pos) in applicable_feature_pos_pairs {
1846        if shaping_data.feature_would_apply(basic_feature.tag(), glyphs, start_index)? {
1847            return Ok(Some(*pos));
1848        }
1849    }
1850
1851    Ok(None)
1852}
1853
1854/// Tag the only Indic consonant medial, Gurmukhi Yakash U+0A75, with
1855/// `Pos::BelowbaseConsonant`.
1856///
1857/// https://github.com/n8willis/opentype-shaping-documents/issues/67
1858fn tag_consonant_medials(glyphs: &mut [RawGlyphIndic]) {
1859    glyphs
1860        .iter_mut()
1861        .filter(|g| g.is(consonant_medial))
1862        .for_each(|g| g.replace_none_pos(Some(Pos::BelowbaseConsonant)))
1863}
1864
1865/// For `RephMode::Implicit` and `RephMode::Explicit` scripts, check if the RPHF feature would
1866/// apply. For `RephMode::LogicalRepha` scripts, check for the existence of a syllable-initial
1867/// "Repha" code point.
1868fn has_reph(
1869    shaping_data: &IndicShapingData<'_>,
1870    glyphs: &[RawGlyphIndic],
1871) -> Result<bool, ShapingError> {
1872    match shaping_data.script.reph_mode() {
1873        RephMode::Implicit => match glyphs.get(..3) {
1874            // A "ZWJ" (or "ZWNJ") after a syllable-initial "Ra, Halant" inhibits "Reph" formation.
1875            Some([g0, g1, g2]) if g0.is(ra) && g1.is(halant) && !g2.is(joiner) => shaping_data
1876                .feature_would_apply(BasicFeature::Rphf.tag(), glyphs, 0)
1877                .map_err(|e| e.into()),
1878            Some(_) | None => Ok(false),
1879        },
1880        RephMode::Explicit => match glyphs.get(..3) {
1881            Some([g0, g1, g2]) if g0.is(ra) && g1.is(halant) && g2.is(zwj) => shaping_data
1882                .feature_would_apply(BasicFeature::Rphf.tag(), glyphs, 0)
1883                .map_err(|e| e.into()),
1884            Some(_) | None => Ok(false),
1885        },
1886        RephMode::LogicalRepha => glyphs
1887            .first()
1888            .map(|g| g.is(repha))
1889            .ok_or_else(|| IndicError::EmptyBuffer.into()),
1890    }
1891}
1892
1893/// Return the final sort-order position of a matra.
1894///
1895/// Return `None` if the input character:
1896///   * is not a matra.
1897///   * is a non-decomposable, multi-part matra (unless specially handled).
1898fn matra_pos(c: char, script: Script) -> Option<Pos> {
1899    // Handle multi-part matras that lack a canonical Unicode decomposition
1900    // https://github.com/n8willis/opentype-shaping-documents/issues/62
1901    match c {
1902        '\u{0AC9}' => return Some(Pos::AfterPost), // Gujarati "Sign Candra O"
1903        '\u{0B57}' => return Some(Pos::AfterPost), // Oriya "Au Length Mark"
1904        _ => {}
1905    }
1906
1907    match indic_character(c) {
1908        (Some(ShapingClass::VowelDependent), Some(mark_placement)) => match mark_placement {
1909            MarkPlacementSubclass::TopPosition => script.abovebase_matra_pos(),
1910            MarkPlacementSubclass::RightPosition => script.rightside_matra_pos(c),
1911            MarkPlacementSubclass::BottomPosition => Some(script.belowbase_matra_pos()),
1912            MarkPlacementSubclass::LeftPosition => Some(Pos::PrebaseMatra),
1913            _ => None,
1914        },
1915        _ => None,
1916    }
1917}
1918
1919/////////////////////////////////////////////////////////////////////////////
1920// Basic substitution features
1921/////////////////////////////////////////////////////////////////////////////
1922
1923/// Applies Indic basic features in their required order
1924fn apply_basic_features(
1925    shaping_data: &IndicShapingData<'_>,
1926    glyphs: &mut Vec<RawGlyphIndic>,
1927) -> Result<(), ParseError> {
1928    for feature in BasicFeature::ALL {
1929        let index = shaping_data.get_lookups_cache_index(feature.mask())?;
1930        let lookups = &shaping_data.gsub_cache.cached_lookups.borrow()[index];
1931
1932        for &(lookup_index, feature_tag) in lookups {
1933            shaping_data.apply_lookup(lookup_index, feature_tag, glyphs, |g| {
1934                feature.is_global() || g.has_mask(feature.mask())
1935            })?;
1936        }
1937    }
1938
1939    Ok(())
1940}
1941
1942/////////////////////////////////////////////////////////////////////////////
1943// Final reordering
1944/////////////////////////////////////////////////////////////////////////////
1945
1946fn final_reorder_consonant_syllable(
1947    shaping_data: &IndicShapingData<'_>,
1948    glyphs: &mut [RawGlyphIndic],
1949) {
1950    // 4.1 Base consonant
1951    let mut opt_base_index = glyphs.iter().position(|g| g.has_pos(Pos::SyllableBase));
1952
1953    // Finding the base consonant in Malayalam appears to require special treatment.
1954    // If there exists below-base consonants after the original base consonant that
1955    // haven't taken on subjoined form, the last of these below-base consonants is
1956    // the new base.
1957    //
1958    // Example, using the Nirmala font:
1959    //                 Syllable: [Ka, Halant, Tta, Halant, Na, Sign E]
1960    //
1961    //    After initial reorder: [Sign E, Ka, Halant, Tta, Halant, Na]
1962    //                           Ka is base, [Halant, Tta] and [Halant, Na] are marked below-base,
1963    //                           but both do not take on subjoined form.
1964    //
1965    //   HarfBuzz and Uniscribe: [Ka, Halant, Tta, Halant, Sign E, Na]
1966    //                           The Sign E matra is moved to before the Na, as it is the new base.
1967    //
1968    // IMPLEMENTATION: If a new base is found, the new `base_index` and the glyph
1969    // marked `Pos::SyllableBase` will be misaligned, but at this stage it shouldn't
1970    // matter.
1971    if let (Script::Malayalam, Some(base_index)) = (shaping_data.script, opt_base_index) {
1972        let start = base_index + 1;
1973        opt_base_index = glyphs[start..]
1974            .iter()
1975            .rposition(|g| g.is(effectively_consonant) && g.has_pos(Pos::BelowbaseConsonant))
1976            .map(|i| i + start)
1977            .or(opt_base_index);
1978    }
1979
1980    // 4.2 Pre-base matras
1981    if let Some(base_index) = opt_base_index {
1982        // Find the start index of a contiguous sequence of `Pos::PrebaseMatra` glyphs
1983        let first_prebase_matra_index = glyphs[..base_index]
1984            .iter()
1985            .position(|g| g.has_pos(Pos::PrebaseMatra));
1986
1987        // Find the end index of a contiguous sequence of `Pos::PrebaseMatra` glyphs
1988        let last_prebase_matra_index = glyphs[..base_index]
1989            .iter()
1990            .rposition(|g| g.has_pos(Pos::PrebaseMatra));
1991
1992        if let (Some(first_prebase_matra_index), Some(last_prebase_matra_index)) =
1993            (first_prebase_matra_index, last_prebase_matra_index)
1994        {
1995            // Find the new start index for this sequence
1996            if let Some(final_prebase_matra_index) = final_pre_base_matra_index(
1997                shaping_data.script,
1998                last_prebase_matra_index,
1999                base_index,
2000                glyphs,
2001            ) {
2002                // Move the sequence
2003                glyphs[first_prebase_matra_index..=final_prebase_matra_index]
2004                    .rotate_left(last_prebase_matra_index - first_prebase_matra_index + 1);
2005            }
2006        }
2007    }
2008
2009    // 4.3 Reph
2010    if let Some(final_reph_index) = final_reph_index(shaping_data.script, opt_base_index, glyphs) {
2011        move_element(glyphs, 0, final_reph_index);
2012
2013        // Get new base index if Reph moves after the base
2014        opt_base_index = opt_base_index.map(|b| if b <= final_reph_index { b - 1 } else { b });
2015    }
2016
2017    // 4.4 Pre-base-reordering consonants
2018    if let (Script::Malayalam, Some(base_index)) | (Script::Telugu, Some(base_index)) =
2019        (shaping_data.script, opt_base_index)
2020    {
2021        let mut pref_glyphs = glyphs
2022            .iter()
2023            .enumerate()
2024            .filter(|(_, g)| g.has_mask(BasicFeature::Pref.mask()));
2025        let pref_glyphs_count = pref_glyphs.clone().count();
2026
2027        // Check that only one glyph has the PREF feature
2028        if let (Some((reordering_ra_index, _)), 1) = (pref_glyphs.next(), pref_glyphs_count) {
2029            let final_reordering_ra_index =
2030                final_pre_base_reordering_consonant_index(shaping_data.script, base_index, glyphs);
2031
2032            move_element(glyphs, reordering_ra_index, final_reordering_ra_index);
2033        }
2034    }
2035
2036    // 4.5 Initial matras
2037    // IMPLEMENTATION: Handled in `apply_presentation_features`
2038}
2039
2040fn final_pre_base_matra_index(
2041    script: Script,
2042    last_prebase_matra_index: usize,
2043    base_index: usize,
2044    glyphs: &[RawGlyphIndic],
2045) -> Option<usize> {
2046    // Malayalam and Tamil do not have HALF forms or explicit "Halant" forms.
2047    // Malayalam typically uses the HALF feature for chillu substitutions, and it
2048    // appears that Tamil can use the HALF feature for forming _ligated_ explicit
2049    // "Halant" forms (the TAMu_Kalyani font does this).
2050    //
2051    // The pre-base matra should be positioned after these glyphs
2052
2053    // https://github.com/n8willis/opentype-shaping-documents/issues/68
2054    if script == Script::Malayalam || script == Script::Tamil {
2055        return Some(base_index - 1);
2056    }
2057
2058    // (1) The pre-base matra's final position is defined as: after the
2059    // last standalone "Halant" glyph that comes after the matra's starting
2060    // position and also comes before the main consonant
2061    //
2062    // (2) If a ZWJ or a ZWNJ follows this last standalone "Halant", the
2063    // final matra position is moved to after the joiner or non-joiner
2064    //
2065    // We don't follow (2). Instead, if a ZWJ follows this last standalone
2066    // "Halant", the final matra position should _not_ be after said "Halant"
2067    // https://github.com/n8willis/opentype-shaping-documents/issues/73
2068    //
2069    // IMPLEMENTATION: ZWNJ is taken care of by the syllable state machine.
2070    // "Halant, ZWNJ" is a terminating sequence for a consonant syllable; any
2071    // pre-base matras occurring after it belong to the subsequent syllable
2072    let start = last_prebase_matra_index + 1;
2073    glyphs[start..=base_index]
2074        .windows(2)
2075        .rposition(|gs| gs[0].is(halant) && !gs[1].is(zwj))
2076        .map(|i| i + start)
2077}
2078
2079// Variant of `final_pre_base_matra_index`. Differences:
2080//   * doesn't special-case Tamil, as the script has no pre-base-reordering consonants
2081//   * positions the pre-base-reordering consonant after a "Halant, ZWJ"
2082//     https://github.com/n8willis/opentype-shaping-documents/issues/73
2083//   * has a default position immediately before the base consonant
2084fn final_pre_base_reordering_consonant_index(
2085    script: Script,
2086    base_index: usize,
2087    glyphs: &[RawGlyphIndic],
2088) -> usize {
2089    if script == Script::Malayalam {
2090        return base_index;
2091    }
2092
2093    let mut iter = glyphs[..=base_index].windows(2).enumerate().rev();
2094    while let Some((i, [g0, g1])) = iter.next() {
2095        if g0.is(halant) {
2096            if g1.is(zwj) {
2097                return i + 2;
2098            }
2099            return i + 1;
2100        }
2101    }
2102
2103    base_index
2104}
2105
2106// At this stage, this step has become such a mish-mash of:
2107//   * the OpenType spec
2108//   * HarfBuzz's interpretation of the OpenType spec
2109//   * our spec
2110//   * comparison against CoreText's output
2111// that it really deserves to be called "Final Reph Pos As Decided by Adrian"
2112// https://github.com/n8willis/opentype-shaping-documents/issues/48
2113fn final_reph_index(
2114    script: Script,
2115    base_index: Option<usize>,
2116    glyphs: &[RawGlyphIndic],
2117) -> Option<usize> {
2118    // No "Reph", no problems
2119    if glyphs.len() < 2 || !glyphs[0].has_pos(Pos::RaToBecomeReph) {
2120        return None;
2121    }
2122
2123    let reph_characteristic = script.reph_position();
2124
2125    // This is "Reorder Reph" step 2/b in OpenType, which HarfBuzz implements
2126    // (and CoreText too, from empirical testing), but our spec doesn't.
2127    //
2128    // "If the "Reph" repositioning class is not after post-base: target position is after
2129    // the first explicit "Halant" glyph between the first post-reph consonant and last main
2130    // consonant. If "ZWJ" or "ZWNJ" are following this "Halant", position is moved after it.
2131    // If such position is found, this is the target position." ***
2132    // https://docs.microsoft.com/en-us/typography/script-development/devanagari#reorder-characters
2133    //
2134    // TEST: "Ra, Halant, Ra, Halant, Ya" using Noto Sans/Serif Devanagari.
2135    // Without this step, the "Reph" is positioned after the "Ya", when this step dictates that
2136    // it should move after the first explicit "Halant" between the "Reph" and base consonant
2137    //
2138    // *** There is evidence to believe that Uniscribe may still do this for the after post-base
2139    //     repositioning class, and HarfBuzz _definitely_ does it
2140    if let Some(base_index) = base_index {
2141        let start = 1;
2142        let mut iter = glyphs[start..=base_index].windows(2).enumerate();
2143        while let Some((i, [g0, g1])) = iter.next() {
2144            if g0.is(halant) {
2145                if g1.is(joiner) {
2146                    return Some(i + 1 + start);
2147                }
2148                return Some(i + start);
2149            }
2150        }
2151    }
2152
2153    // This is where things start getting even more fantastic.
2154    //
2155    // For scripts that have the REPH_POS_BEFORE_POST characteristic, OpenType "Reorder Reph" step 4/d
2156    // states:
2157    //
2158    // "If "Reph" should be positioned before post-base consonant, find first post-base classified
2159    // consonant not ligated with main. If no consonant is found, the target position should be
2160    // before the first matra, syllable modifier sign or vedic sign."
2161    //
2162    // Our spec imitates OpenType. However, it looks like HarfBuzz and CoreText don't, and instead
2163    // jump straight to step 5/e:
2164    //
2165    // "If no consonant is found in 3/c or 4/d, move "Reph" to a position immediately before
2166    // the first post-base matra, syllable modifier sign or vedic sign ***that has a reordering
2167    // class after the intended "Reph" position***. For example, if the reordering position for
2168    // "Reph" is post-main, it will skip above-base matras that also have a post-main position."
2169    //
2170    // TEST: "Ra, Halant, Ka, Sign Aa" using Noto Sans/Serif Devanagari.
2171    // HarfBuzz and CoreText have the "Reph" positioned after the "Sign Aa" (which is marked
2172    // Pos::AfterSubjoined). If we followed our spec/OpenType, it gets positioned after the "Ka"
2173
2174    // HarfBuzz **does** implement their interpretation of 4/d, but for whatever reason only applies
2175    // it to scripts that have the REPH_POS_AFTER_SUBJOINED characteristic.
2176    //
2177    // There is no explicit handling of REPH_POS_BEFORE_SUBJOINED in HarfBuzz
2178
2179    // Biting the bullet and making this change so as to be consistent with HarfBuzz and
2180    // Uniscribe's (Gujarati) output. Sorry CoreText!
2181    //
2182    // For scripts with the REPH_POS_BEFORE_POST characteristic, position the "Reph" after
2183    // ALL post-base matras
2184    let reordering_class = match reph_characteristic {
2185        Pos::BeforePost => Some(Pos::AfterPost),
2186        _ => Some(reph_characteristic),
2187    };
2188
2189    let new_index = glyphs
2190        .iter()
2191        .rposition(|g| g.pos() <= reordering_class)
2192        .unwrap_or(glyphs.len() - 1); // Fallback index == end of syllable
2193
2194    // This step doesn't appear to be covered in OpenType, but is implemented in HarfBuzz and
2195    // appears to be implemented in CoreText. From our spec:
2196    //
2197    // "Finally, if the final position of "Reph" occurs after a "matra, Halant" subsequence, then
2198    // "Reph" must be repositioned to the left of "Halant", to allow for potential matching with
2199    // abvs or psts substitutions from GSUB."
2200    //
2201    // Our spec applies this to all "Reph" characteristics except REPH_POS_BEFORE_POST.
2202    // TEST: "Ra, Halant, Ka, O, Halant" in Noto Sans/Serif Devanagari (Devanagari incorporates
2203    // the REPH_POS_BEFORE_POST characteristic)
2204    match (glyphs.get(new_index - 1), glyphs.get(new_index)) {
2205        (Some(g0), Some(g1)) if g0.is(matra) && g1.is(halant) => Some(new_index - 1),
2206        _ => Some(new_index),
2207    }
2208}
2209
2210/////////////////////////////////////////////////////////////////////////////
2211// Remaining substitution features
2212/////////////////////////////////////////////////////////////////////////////
2213
2214/// Apply remaining substitution features after final reordering.
2215///
2216/// If the syllable is the first in a word, applies the INIT feature.
2217///
2218/// The order in which the remaining features are applied should be in
2219/// the order in which they appear in the GSUB table.
2220fn apply_presentation_features(
2221    shaping_data: &IndicShapingData<'_>,
2222    is_first_syllable: bool,
2223    glyphs: &mut Vec<RawGlyphIndic>,
2224) -> Result<(), ParseError> {
2225    let mut features = FeatureMask::PRES
2226        | FeatureMask::ABVS
2227        | FeatureMask::BLWS
2228        | FeatureMask::PSTS
2229        | FeatureMask::HALN
2230        | FeatureMask::CALT;
2231
2232    if let Some(glyph) = glyphs.first_mut() {
2233        if is_first_syllable && glyph.has_pos(Pos::PrebaseMatra) {
2234            glyph.add_mask(FeatureMask::INIT);
2235            features |= FeatureMask::INIT;
2236        }
2237    }
2238    let index = shaping_data.get_lookups_cache_index(features)?;
2239    let lookups = &shaping_data.gsub_cache.cached_lookups.borrow()[index];
2240
2241    for &(lookup_index, feature_tag) in lookups {
2242        shaping_data.apply_lookup(lookup_index, feature_tag, glyphs, |g| {
2243            feature_tag != tag::INIT || g.has_mask(FeatureMask::INIT)
2244        })?;
2245    }
2246
2247    Ok(())
2248}
2249
2250/////////////////////////////////////////////////////////////////////////////
2251// Helper functions
2252/////////////////////////////////////////////////////////////////////////////
2253
2254fn to_raw_glyph_indic(glyph: &RawGlyph<()>) -> RawGlyphIndic {
2255    RawGlyphIndic {
2256        unicodes: glyph.unicodes.clone(),
2257        glyph_index: glyph.glyph_index,
2258        liga_component_pos: glyph.liga_component_pos,
2259        glyph_origin: glyph.glyph_origin,
2260        flags: glyph.flags,
2261        variation: glyph.variation,
2262        extra_data: IndicData {
2263            pos: None,
2264            mask: FeatureMask::empty(),
2265        },
2266    }
2267}
2268
2269fn from_raw_glyph_indic(glyph: RawGlyphIndic) -> RawGlyph<()> {
2270    RawGlyph {
2271        unicodes: glyph.unicodes,
2272        glyph_index: glyph.glyph_index,
2273        liga_component_pos: glyph.liga_component_pos,
2274        glyph_origin: glyph.glyph_origin,
2275        flags: glyph.flags,
2276        variation: glyph.variation,
2277        extra_data: (),
2278    }
2279}
2280
2281/// Checks if a character is effectively an Indic consonant.
2282///
2283/// Gurmukhi's two `ConsonantPlaceholder` characters "Iri" and "Ura" are
2284/// considered consonants.
2285///
2286/// Kannada's two `ConsonantWithStacker` characters "Jihvamuliya" and
2287/// "Upadhmaniya" are considered consonants.
2288///
2289/// Also, HarfBuzz treats dotted circles, placeholders, and independent
2290/// vowels as consonants. We follow suit.
2291fn effectively_consonant(c: char) -> bool {
2292    match shaping_class(c) {
2293        Some(ShapingClass::Consonant)
2294        | Some(ShapingClass::ConsonantDead)
2295        | Some(ShapingClass::ConsonantPlaceholder)
2296        | Some(ShapingClass::ConsonantWithStacker)
2297        | Some(ShapingClass::DottedCircle)
2298        | Some(ShapingClass::Number)
2299        | Some(ShapingClass::Placeholder)
2300        | Some(ShapingClass::VowelIndependent) => true,
2301        _ => false,
2302    }
2303}
2304
2305fn move_element<T>(slice: &mut [T], from: usize, to: usize) {
2306    if from < to {
2307        slice[from..=to].rotate_left(1);
2308    } else {
2309        slice[to..=from].rotate_right(1);
2310    }
2311}
2312
2313/////////////////////////////////////////////////////////////////////////////
2314// Indic character tables
2315/////////////////////////////////////////////////////////////////////////////
2316
2317#[rustfmt::skip]
2318fn indic_character(ch: char) -> (Option<ShapingClass>, Option<MarkPlacementSubclass>) {
2319    use MarkPlacementSubclass::*;
2320    use ShapingClass::*;
2321
2322    match ch as u32 {
2323        // Devanagari character table
2324        0x0900 => (Some(Bindu), Some(TopPosition)),             // Inverted Candrabindu
2325        0x0901 => (Some(Bindu), Some(TopPosition)),             // Candrabindu
2326        0x0902 => (Some(Bindu), Some(TopPosition)),             // Anusvara
2327        0x0903 => (Some(Visarga), Some(RightPosition)),         // Visarga
2328        0x0904 => (Some(VowelIndependent), None),               // Short A
2329        0x0905 => (Some(VowelIndependent), None),               // A
2330        0x0906 => (Some(VowelIndependent), None),               // Aa
2331        0x0907 => (Some(VowelIndependent), None),               // I
2332        0x0908 => (Some(VowelIndependent), None),               // Ii
2333        0x0909 => (Some(VowelIndependent), None),               // U
2334        0x090A => (Some(VowelIndependent), None),               // Uu
2335        0x090B => (Some(VowelIndependent), None),               // Vocalic R
2336        0x090C => (Some(VowelIndependent), None),               // Vocalic L
2337        0x090D => (Some(VowelIndependent), None),               // Candra E
2338        0x090E => (Some(VowelIndependent), None),               // Short E
2339        0x090F => (Some(VowelIndependent), None),               // E
2340        0x0910 => (Some(VowelIndependent), None),               // Ai
2341        0x0911 => (Some(VowelIndependent), None),               // Candra O
2342        0x0912 => (Some(VowelIndependent), None),               // Short O
2343        0x0913 => (Some(VowelIndependent), None),               // O
2344        0x0914 => (Some(VowelIndependent), None),               // Au
2345        0x0915 => (Some(Consonant), None),                      // Ka
2346        0x0916 => (Some(Consonant), None),                      // Kha
2347        0x0917 => (Some(Consonant), None),                      // Ga
2348        0x0918 => (Some(Consonant), None),                      // Gha
2349        0x0919 => (Some(Consonant), None),                      // Nga
2350        0x091A => (Some(Consonant), None),                      // Ca
2351        0x091B => (Some(Consonant), None),                      // Cha
2352        0x091C => (Some(Consonant), None),                      // Ja
2353        0x091D => (Some(Consonant), None),                      // Jha
2354        0x091E => (Some(Consonant), None),                      // Nya
2355        0x091F => (Some(Consonant), None),                      // Tta
2356        0x0920 => (Some(Consonant), None),                      // Ttha
2357        0x0921 => (Some(Consonant), None),                      // Dda
2358        0x0922 => (Some(Consonant), None),                      // Ddha
2359        0x0923 => (Some(Consonant), None),                      // Nna
2360        0x0924 => (Some(Consonant), None),                      // Ta
2361        0x0925 => (Some(Consonant), None),                      // Tha
2362        0x0926 => (Some(Consonant), None),                      // Da
2363        0x0927 => (Some(Consonant), None),                      // Dha
2364        0x0928 => (Some(Consonant), None),                      // Na
2365        0x0929 => (Some(Consonant), None),                      // Nnna
2366        0x092A => (Some(Consonant), None),                      // Pa
2367        0x092B => (Some(Consonant), None),                      // Pha
2368        0x092C => (Some(Consonant), None),                      // Ba
2369        0x092D => (Some(Consonant), None),                      // Bha
2370        0x092E => (Some(Consonant), None),                      // Ma
2371        0x092F => (Some(Consonant), None),                      // Ya
2372        0x0930 => (Some(Consonant), None),                      // Ra
2373        0x0931 => (Some(Consonant), None),                      // Rra
2374        0x0932 => (Some(Consonant), None),                      // La
2375        0x0933 => (Some(Consonant), None),                      // Lla
2376        0x0934 => (Some(Consonant), None),                      // Llla
2377        0x0935 => (Some(Consonant), None),                      // Va
2378        0x0936 => (Some(Consonant), None),                      // Sha
2379        0x0937 => (Some(Consonant), None),                      // Ssa
2380        0x0938 => (Some(Consonant), None),                      // Sa
2381        0x0939 => (Some(Consonant), None),                      // Ha
2382        0x093A => (Some(VowelDependent), Some(TopPosition)),    // Sign Oe
2383        0x093B => (Some(VowelDependent), Some(RightPosition)),  // Sign Ooe
2384        0x093C => (Some(Nukta), Some(BottomPosition)),          // Nukta
2385        0x093D => (Some(Avagraha), None),                       // Avagraha
2386        0x093E => (Some(VowelDependent), Some(RightPosition)),  // Sign Aa
2387        0x093F => (Some(VowelDependent), Some(LeftPosition)),   // Sign I
2388        0x0940 => (Some(VowelDependent), Some(RightPosition)),  // Sign Ii
2389        0x0941 => (Some(VowelDependent), Some(BottomPosition)), // Sign U
2390        0x0942 => (Some(VowelDependent), Some(BottomPosition)), // Sign Uu
2391        0x0943 => (Some(VowelDependent), Some(BottomPosition)), // Sign Vocalic R
2392        0x0944 => (Some(VowelDependent), Some(BottomPosition)), // Sign Vocalic Rr
2393        0x0945 => (Some(VowelDependent), Some(TopPosition)),    // Sign Candra E
2394        0x0946 => (Some(VowelDependent), Some(TopPosition)),    // Sign Short E
2395        0x0947 => (Some(VowelDependent), Some(TopPosition)),    // Sign E
2396        0x0948 => (Some(VowelDependent), Some(TopPosition)),    // Sign Ai
2397        0x0949 => (Some(VowelDependent), Some(RightPosition)),  // Sign Candra O
2398        0x094A => (Some(VowelDependent), Some(RightPosition)),  // Sign Short O
2399        0x094B => (Some(VowelDependent), Some(RightPosition)),  // Sign O
2400        0x094C => (Some(VowelDependent), Some(RightPosition)),  // Sign Au
2401        0x094D => (Some(Virama), Some(BottomPosition)),         // Virama
2402        0x094E => (Some(VowelDependent), Some(LeftPosition)),   // Sign Prishthamatra E
2403        0x094F => (Some(VowelDependent), Some(RightPosition)),  // Sign Aw
2404        0x0950 => (None, None),                                 // Om
2405        0x0951 => (Some(Cantillation), Some(TopPosition)),      // Udatta
2406        0x0952 => (Some(Cantillation), Some(BottomPosition)),   // Anudatta
2407        0x0953 => (None, Some(TopPosition)),                    // Grave accent
2408        0x0954 => (None, Some(TopPosition)),                    // Acute accent
2409        0x0955 => (Some(VowelDependent), Some(TopPosition)),    // Sign Candra Long E
2410        0x0956 => (Some(VowelDependent), Some(BottomPosition)), // Sign Ue
2411        0x0957 => (Some(VowelDependent), Some(BottomPosition)), // Sign Uue
2412        0x0958 => (Some(Consonant), None),                      // Qa
2413        0x0959 => (Some(Consonant), None),                      // Khha
2414        0x095A => (Some(Consonant), None),                      // Ghha
2415        0x095B => (Some(Consonant), None),                      // Za
2416        0x095C => (Some(Consonant), None),                      // Dddha
2417        0x095D => (Some(Consonant), None),                      // Rha
2418        0x095E => (Some(Consonant), None),                      // Fa
2419        0x095F => (Some(Consonant), None),                      // Yya
2420        0x0960 => (Some(VowelIndependent), None),               // Vocalic Rr
2421        0x0961 => (Some(VowelIndependent), None),               // Vocalic Ll
2422        0x0962 => (Some(VowelDependent), Some(BottomPosition)), // Sign Vocalic L
2423        0x0963 => (Some(VowelDependent), Some(BottomPosition)), // Sign Vocalic Ll
2424        0x0964 => (None, None),                                 // Danda
2425        0x0965 => (None, None),                                 // Double Danda
2426        0x0966 => (Some(Number), None),                         // Digit Zero
2427        0x0967 => (Some(Number), None),                         // Digit One
2428        0x0968 => (Some(Number), None),                         // Digit Two
2429        0x0969 => (Some(Number), None),                         // Digit Three
2430        0x096A => (Some(Number), None),                         // Digit Four
2431        0x096B => (Some(Number), None),                         // Digit Five
2432        0x096C => (Some(Number), None),                         // Digit Six
2433        0x096D => (Some(Number), None),                         // Digit Seven
2434        0x096E => (Some(Number), None),                         // Digit Eight
2435        0x096F => (Some(Number), None),                         // Digit Nine
2436        0x0970 => (None, None),                                 // Abbreviation Sign
2437        0x0971 => (None, None),                                 // Sign High Spacing Dot
2438        0x0972 => (Some(VowelIndependent), None),               // Candra Aa
2439        0x0973 => (Some(VowelIndependent), None),               // Oe
2440        0x0974 => (Some(VowelIndependent), None),               // Ooe
2441        0x0975 => (Some(VowelIndependent), None),               // Aw
2442        0x0976 => (Some(VowelIndependent), None),               // Ue
2443        0x0977 => (Some(VowelIndependent), None),               // Uue
2444        0x0978 => (Some(Consonant), None),                      // Marwari Dda
2445        0x0979 => (Some(Consonant), None),                      // Zha
2446        0x097A => (Some(Consonant), None),                      // Heavy Ya
2447        0x097B => (Some(Consonant), None),                      // Gga
2448        0x097C => (Some(Consonant), None),                      // Jja
2449        0x097D => (Some(Consonant), None),                      // Glottal Stop
2450        0x097E => (Some(Consonant), None),                      // Ddda
2451        0x097F => (Some(Consonant), None),                      // Bba
2452
2453        // Bengali character table
2454        0x0980 => (Some(ConsonantPlaceholder), None),                 // Anji
2455        0x0981 => (Some(Bindu), Some(TopPosition)),                   // Candrabindu
2456        0x0982 => (Some(Bindu), Some(RightPosition)),                 // Anusvara
2457        0x0983 => (Some(Visarga), Some(RightPosition)),               // Visarga
2458        0x0984 => (None, None),                                       // unassigned
2459        0x0985 => (Some(VowelIndependent), None),                     // A
2460        0x0986 => (Some(VowelIndependent), None),                     // Aa
2461        0x0987 => (Some(VowelIndependent), None),                     // I
2462        0x0988 => (Some(VowelIndependent), None),                     // Ii
2463        0x0989 => (Some(VowelIndependent), None),                     // U
2464        0x098A => (Some(VowelIndependent), None),                     // Uu
2465        0x098B => (Some(VowelIndependent), None),                     // Vocalic R
2466        0x098C => (Some(VowelIndependent), None),                     // Vocalic L
2467        0x098D => (None, None),                                       // unassigned
2468        0x098E => (None, None),                                       // unassigned
2469        0x098F => (Some(VowelIndependent), None),                     // E
2470        0x0990 => (Some(VowelIndependent), None),                     // Ai
2471        0x0991 => (None, None),                                       // unassigned
2472        0x0992 => (None, None),                                       // unassigned
2473        0x0993 => (Some(VowelIndependent), None),                     // O
2474        0x0994 => (Some(VowelIndependent), None),                     // Au
2475        0x0995 => (Some(Consonant), None),                            // Ka
2476        0x0996 => (Some(Consonant), None),                            // Kha
2477        0x0997 => (Some(Consonant), None),                            // Ga
2478        0x0998 => (Some(Consonant), None),                            // Gha
2479        0x0999 => (Some(Consonant), None),                            // Nga
2480        0x099A => (Some(Consonant), None),                            // Ca
2481        0x099B => (Some(Consonant), None),                            // Cha
2482        0x099C => (Some(Consonant), None),                            // Ja
2483        0x099D => (Some(Consonant), None),                            // Jha
2484        0x099E => (Some(Consonant), None),                            // Nya
2485        0x099F => (Some(Consonant), None),                            // Tta
2486        0x09A0 => (Some(Consonant), None),                            // Ttha
2487        0x09A1 => (Some(Consonant), None),                            // Dda
2488        0x09A2 => (Some(Consonant), None),                            // Ddha
2489        0x09A3 => (Some(Consonant), None),                            // Nna
2490        0x09A4 => (Some(Consonant), None),                            // Ta
2491        0x09A5 => (Some(Consonant), None),                            // Tha
2492        0x09A6 => (Some(Consonant), None),                            // Da
2493        0x09A7 => (Some(Consonant), None),                            // Dha
2494        0x09A8 => (Some(Consonant), None),                            // Na
2495        0x09A9 => (None, None),                                       // unassigned
2496        0x09AA => (Some(Consonant), None),                            // Pa
2497        0x09AB => (Some(Consonant), None),                            // Pha
2498        0x09AC => (Some(Consonant), None),                            // Ba
2499        0x09AD => (Some(Consonant), None),                            // Bha
2500        0x09AE => (Some(Consonant), None),                            // Ma
2501        0x09AF => (Some(Consonant), None),                            // Ya
2502        0x09B0 => (Some(Consonant), None),                            // Ra
2503        0x09B1 => (None, None),                                       // unassigned
2504        0x09B2 => (Some(Consonant), None),                            // La
2505        0x09B3 => (None, None),                                       // unassigned
2506        0x09B4 => (None, None),                                       // unassigned
2507        0x09B5 => (None, None),                                       // unassigned
2508        0x09B6 => (Some(Consonant), None),                            // Sha
2509        0x09B7 => (Some(Consonant), None),                            // Ssa
2510        0x09B8 => (Some(Consonant), None),                            // Sa
2511        0x09B9 => (Some(Consonant), None),                            // Ha
2512        0x09BA => (None, None),                                       // unassigned
2513        0x09BB => (None, None),                                       // unassigned
2514        0x09BC => (Some(Nukta), Some(BottomPosition)),                // Nukta
2515        0x09BD => (Some(Avagraha), None),                             // Avagraha
2516        0x09BE => (Some(VowelDependent), Some(RightPosition)),        // Sign Aa
2517        0x09BF => (Some(VowelDependent), Some(LeftPosition)),         // Sign I
2518        0x09C0 => (Some(VowelDependent), Some(RightPosition)),        // Sign Ii
2519        0x09C1 => (Some(VowelDependent), Some(BottomPosition)),       // Sign U
2520        0x09C2 => (Some(VowelDependent), Some(BottomPosition)),       // Sign Uu
2521        0x09C3 => (Some(VowelDependent), Some(BottomPosition)),       // Sign Vocalic R
2522        0x09C4 => (Some(VowelDependent), Some(BottomPosition)),       // Sign Vocalic Rr
2523        0x09C5 => (None, None),                                       // unassigned
2524        0x09C6 => (None, None),                                       // unassigned
2525        0x09C7 => (Some(VowelDependent), Some(LeftPosition)),         // Sign E
2526        0x09C8 => (Some(VowelDependent), Some(LeftPosition)),         // Sign Ai
2527        0x09C9 => (None, None),                                       // unassigned
2528        0x09CA => (None, None),                                       // unassigned
2529        0x09CB => (Some(VowelDependent), Some(LeftAndRightPosition)), // Sign O
2530        0x09CC => (Some(VowelDependent), Some(LeftAndRightPosition)), // Sign Au
2531        0x09CD => (Some(Virama), Some(BottomPosition)),               // Virama
2532        0x09CE => (Some(ConsonantDead), None),                        // Khanda Ta
2533        0x09CF => (None, None),                                       // unassigned
2534        0x09D0 => (None, None),                                       // unassigned
2535        0x09D1 => (None, None),                                       // unassigned
2536        0x09D2 => (None, None),                                       // unassigned
2537        0x09D3 => (None, None),                                       // unassigned
2538        0x09D4 => (None, None),                                       // unassigned
2539        0x09D5 => (None, None),                                       // unassigned
2540        0x09D6 => (None, None),                                       // unassigned
2541        0x09D7 => (Some(VowelDependent), Some(RightPosition)),        // Au Length Mark
2542        0x09D8 => (None, None),                                       // unassigned
2543        0x09D9 => (None, None),                                       // unassigned
2544        0x09DA => (None, None),                                       // unassigned
2545        0x09DB => (None, None),                                       // unassigned
2546        0x09DC => (Some(Consonant), None),                            // Rra
2547        0x09DD => (Some(Consonant), None),                            // Rha
2548        0x09DE => (None, None),                                       // unassigned
2549        0x09DF => (Some(Consonant), None),                            // Yya
2550        0x09E0 => (Some(VowelIndependent), None),                     // Vocalic Rr
2551        0x09E1 => (Some(VowelIndependent), None),                     // Vocalic Ll
2552        0x09E2 => (Some(VowelDependent), Some(BottomPosition)),       // Sign Vocalic L
2553        0x09E3 => (Some(VowelDependent), Some(BottomPosition)),       // Sign Vocalic Ll
2554        0x09E4 => (None, None),                                       // unassigned
2555        0x09E5 => (None, None),                                       // unassigned
2556        0x09E6 => (Some(Number), None),                               // Digit Zero
2557        0x09E7 => (Some(Number), None),                               // Digit One
2558        0x09E8 => (Some(Number), None),                               // Digit Two
2559        0x09E9 => (Some(Number), None),                               // Digit Three
2560        0x09EA => (Some(Number), None),                               // Digit Four
2561        0x09EB => (Some(Number), None),                               // Digit Five
2562        0x09EC => (Some(Number), None),                               // Digit Six
2563        0x09ED => (Some(Number), None),                               // Digit Seven
2564        0x09EE => (Some(Number), None),                               // Digit Eight
2565        0x09EF => (Some(Number), None),                               // Digit Nine
2566        0x09F0 => (Some(Consonant), None),                            // Assamese Ra
2567        0x09F1 => (Some(Consonant), None),                            // Assamese Wa
2568        0x09F2 => (Some(Symbol), None),                               // Rupee Mark
2569        0x09F3 => (Some(Symbol), None),                               // Rupee Sign
2570        0x09F4 => (Some(Number), None),                               // Numerator One
2571        0x09F5 => (Some(Number), None),                               // Numerator Two
2572        0x09F6 => (Some(Number), None),                               // Numerator Three
2573        0x09F7 => (Some(Number), None),                               // Numerator Four
2574        0x09F8 => (Some(Number), None),                               // Numerator One Less Than Denominator
2575        0x09F9 => (Some(Number), None),                               // Denominator Sixteen
2576        0x09FA => (Some(Symbol), None),                               // Isshar
2577        0x09FB => (Some(Symbol), None),                               // Ganda Mark
2578        0x09FC => (None, None),                                       // Vedic Anusvara
2579        0x09FD => (None, None),                                       // Abbreviation Sign
2580        0x09FE => (Some(SyllableModifier), Some(TopPosition)),        // Sandhi Mark
2581
2582        // Gurmukhi character table
2583        0x0A00 => (None, None),                                  // unassigned
2584        0x0A01 => (Some(Bindu), Some(TopPosition)),              // Adak Bindi
2585        0x0A02 => (Some(Bindu), Some(TopPosition)),              // Bindi
2586        0x0A03 => (Some(Visarga), Some(RightPosition)),          // Visarga
2587        0x0A04 => (None, None),                                  // unassigned
2588        0x0A05 => (Some(VowelIndependent), None),                // A
2589        0x0A06 => (Some(VowelIndependent), None),                // Aa
2590        0x0A07 => (Some(VowelIndependent), None),                // I
2591        0x0A08 => (Some(VowelIndependent), None),                // Ii
2592        0x0A09 => (Some(VowelIndependent), None),                // U
2593        0x0A0A => (Some(VowelIndependent), None),                // Uu
2594        0x0A0B => (None, None),                                  // unassigned
2595        0x0A0C => (None, None),                                  // unassigned
2596        0x0A0D => (None, None),                                  // unassigned
2597        0x0A0E => (None, None),                                  // unassigned
2598        0x0A0F => (Some(VowelIndependent), None),                // Ee
2599        0x0A10 => (Some(VowelIndependent), None),                // Ai
2600        0x0A11 => (None, None),                                  // unassigned
2601        0x0A12 => (None, None),                                  // unassigned
2602        0x0A13 => (Some(VowelIndependent), None),                // Oo
2603        0x0A14 => (Some(VowelIndependent), None),                // Au
2604        0x0A15 => (Some(Consonant), None),                       // Ka
2605        0x0A16 => (Some(Consonant), None),                       // Kha
2606        0x0A17 => (Some(Consonant), None),                       // Ga
2607        0x0A18 => (Some(Consonant), None),                       // Gha
2608        0x0A19 => (Some(Consonant), None),                       // Nga
2609        0x0A1A => (Some(Consonant), None),                       // Ca
2610        0x0A1B => (Some(Consonant), None),                       // Cha
2611        0x0A1C => (Some(Consonant), None),                       // Ja
2612        0x0A1D => (Some(Consonant), None),                       // Jha
2613        0x0A1E => (Some(Consonant), None),                       // Nya
2614        0x0A1F => (Some(Consonant), None),                       // Tta
2615        0x0A20 => (Some(Consonant), None),                       // Ttha
2616        0x0A21 => (Some(Consonant), None),                       // Dda
2617        0x0A22 => (Some(Consonant), None),                       // Ddha
2618        0x0A23 => (Some(Consonant), None),                       // Nna
2619        0x0A24 => (Some(Consonant), None),                       // Ta
2620        0x0A25 => (Some(Consonant), None),                       // Tha
2621        0x0A26 => (Some(Consonant), None),                       // Da
2622        0x0A27 => (Some(Consonant), None),                       // Dha
2623        0x0A28 => (Some(Consonant), None),                       // Na
2624        0x0A29 => (None, None),                                  // unassigned
2625        0x0A2A => (Some(Consonant), None),                       // Pa
2626        0x0A2B => (Some(Consonant), None),                       // Pha
2627        0x0A2C => (Some(Consonant), None),                       // Ba
2628        0x0A2D => (Some(Consonant), None),                       // Bha
2629        0x0A2E => (Some(Consonant), None),                       // Ma
2630        0x0A2F => (Some(Consonant), None),                       // Ya
2631        0x0A30 => (Some(Consonant), None),                       // Ra
2632        0x0A31 => (None, None),                                  // unassigned
2633        0x0A32 => (Some(Consonant), None),                       // La
2634        0x0A33 => (Some(Consonant), None),                       // Lla
2635        0x0A34 => (None, None),                                  // unassigned
2636        0x0A35 => (Some(Consonant), None),                       // Va
2637        0x0A36 => (Some(Consonant), None),                       // Sha
2638        0x0A37 => (None, None),                                  // unassigned
2639        0x0A38 => (Some(Consonant), None),                       // Sa
2640        0x0A39 => (Some(Consonant), None),                       // Ha
2641        0x0A3A => (None, None),                                  // unassigned
2642        0x0A3B => (None, None),                                  // unassigned
2643        0x0A3C => (Some(Nukta), Some(BottomPosition)),           // Nukta
2644        0x0A3D => (None, None),                                  // unassigned
2645        0x0A3E => (Some(VowelDependent), Some(RightPosition)),   // Sign Aa
2646        0x0A3F => (Some(VowelDependent), Some(LeftPosition)),    // Sign I
2647        0x0A40 => (Some(VowelDependent), Some(RightPosition)),   // Sign Ii
2648        0x0A41 => (Some(VowelDependent), Some(BottomPosition)),  // Sign U
2649        0x0A42 => (Some(VowelDependent), Some(BottomPosition)),  // Sign Uu
2650        0x0A43 => (None, None),                                  // unassigned
2651        0x0A44 => (None, None),                                  // unassigned
2652        0x0A45 => (None, None),                                  // unassigned
2653        0x0A46 => (None, None),                                  // unassigned
2654        0x0A47 => (Some(VowelDependent), Some(TopPosition)),     // Sign Ee
2655        0x0A48 => (Some(VowelDependent), Some(TopPosition)),     // Sign Ai
2656        0x0A49 => (None, None),                                  // unassigned
2657        0x0A4A => (None, None),                                  // unassigned
2658        0x0A4B => (Some(VowelDependent), Some(TopPosition)),     // Sign Oo
2659        0x0A4C => (Some(VowelDependent), Some(TopPosition)),     // Sign Au
2660        0x0A4D => (Some(Virama), Some(BottomPosition)),          // Virama
2661        0x0A4E => (None, None),                                  // unassigned
2662        0x0A4F => (None, None),                                  // unassigned
2663        0x0A50 => (None, None),                                  // unassigned
2664        0x0A51 => (Some(Cantillation), None),                    // Udaat
2665        0x0A52 => (None, None),                                  // unassigned
2666        0x0A53 => (None, None),                                  // unassigned
2667        0x0A54 => (None, None),                                  // unassigned
2668        0x0A55 => (None, None),                                  // unassigned
2669        0x0A56 => (None, None),                                  // unassigned
2670        0x0A57 => (None, None),                                  // unassigned
2671        0x0A58 => (None, None),                                  // unassigned
2672        0x0A59 => (Some(Consonant), None),                       // Khha
2673        0x0A5A => (Some(Consonant), None),                       // Ghha
2674        0x0A5B => (Some(Consonant), None),                       // Za
2675        0x0A5C => (Some(Consonant), None),                       // Rra
2676        0x0A5D => (None, None),                                  // unassigned
2677        0x0A5E => (Some(Consonant), None),                       // Fa
2678        0x0A5F => (None, None),                                  // unassigned
2679        0x0A60 => (None, None),                                  // unassigned
2680        0x0A61 => (None, None),                                  // unassigned
2681        0x0A62 => (None, None),                                  // unassigned
2682        0x0A63 => (None, None),                                  // unassigned
2683        0x0A64 => (None, None),                                  // unassigned
2684        0x0A65 => (None, None),                                  // unassigned
2685        0x0A66 => (Some(Number), None),                          // Digit Zero
2686        0x0A67 => (Some(Number), None),                          // Digit One
2687        0x0A68 => (Some(Number), None),                          // Digit Two
2688        0x0A69 => (Some(Number), None),                          // Digit Three
2689        0x0A6A => (Some(Number), None),                          // Digit Four
2690        0x0A6B => (Some(Number), None),                          // Digit Five
2691        0x0A6C => (Some(Number), None),                          // Digit Six
2692        0x0A6D => (Some(Number), None),                          // Digit Seven
2693        0x0A6E => (Some(Number), None),                          // Digit Eight
2694        0x0A6F => (Some(Number), None),                          // Digit Nine
2695        0x0A70 => (Some(Bindu), Some(TopPosition)),              // Tippi
2696        0x0A71 => (Some(GeminationMark), Some(TopPosition)),     // Addak
2697        0x0A72 => (Some(ConsonantPlaceholder), None),            // Iri
2698        0x0A73 => (Some(ConsonantPlaceholder), None),            // Ura
2699        0x0A74 => (None, None),                                  // Ek Onkar
2700        0x0A75 => (Some(ConsonantMedial), Some(BottomPosition)), // Yakash
2701        0x0A76 => (None, None),                                  // Abbreviation Sign
2702
2703        // Gujarati character table
2704        0x0A81 => (Some(Bindu), Some(TopPosition)),                  // Candrabindu
2705        0x0A82 => (Some(Bindu), Some(TopPosition)),                  // Anusvara
2706        0x0A83 => (Some(Visarga), Some(RightPosition)),              // Visarga
2707        0x0A84 => (None, None),                                      // unassigned
2708        0x0A85 => (Some(VowelIndependent), None),                    // A
2709        0x0A86 => (Some(VowelIndependent), None),                    // Aa
2710        0x0A87 => (Some(VowelIndependent), None),                    // I
2711        0x0A88 => (Some(VowelIndependent), None),                    // Ii
2712        0x0A89 => (Some(VowelIndependent), None),                    // U
2713        0x0A8A => (Some(VowelIndependent), None),                    // Uu
2714        0x0A8B => (Some(VowelIndependent), None),                    // Vocalic R
2715        0x0A8C => (Some(VowelIndependent), None),                    // Vocalic L
2716        0x0A8D => (Some(VowelIndependent), None),                    // Candra E
2717        0x0A8E => (None, None),                                      // unassigned
2718        0x0A8F => (Some(VowelIndependent), None),                    // E
2719        0x0A90 => (Some(VowelIndependent), None),                    // Ai
2720        0x0A91 => (Some(VowelIndependent), None),                    // Candra O
2721        0x0A92 => (None, None),                                      // unassigned
2722        0x0A93 => (Some(VowelIndependent), None),                    // O
2723        0x0A94 => (Some(VowelIndependent), None),                    // Au
2724        0x0A95 => (Some(Consonant), None),                           // Ka
2725        0x0A96 => (Some(Consonant), None),                           // Kha
2726        0x0A97 => (Some(Consonant), None),                           // Ga
2727        0x0A98 => (Some(Consonant), None),                           // Gha
2728        0x0A99 => (Some(Consonant), None),                           // Nga
2729        0x0A9A => (Some(Consonant), None),                           // Ca
2730        0x0A9B => (Some(Consonant), None),                           // Cha
2731        0x0A9C => (Some(Consonant), None),                           // Ja
2732        0x0A9D => (Some(Consonant), None),                           // Jha
2733        0x0A9E => (Some(Consonant), None),                           // Nya
2734        0x0A9F => (Some(Consonant), None),                           // Tta
2735        0x0AA0 => (Some(Consonant), None),                           // Ttha
2736        0x0AA1 => (Some(Consonant), None),                           // Dda
2737        0x0AA2 => (Some(Consonant), None),                           // Ddha
2738        0x0AA3 => (Some(Consonant), None),                           // Nna
2739        0x0AA4 => (Some(Consonant), None),                           // Ta
2740        0x0AA5 => (Some(Consonant), None),                           // Tha
2741        0x0AA6 => (Some(Consonant), None),                           // Da
2742        0x0AA7 => (Some(Consonant), None),                           // Dha
2743        0x0AA8 => (Some(Consonant), None),                           // Na
2744        0x0AA9 => (None, None),                                      // unassigned
2745        0x0AAA => (Some(Consonant), None),                           // Pa
2746        0x0AAB => (Some(Consonant), None),                           // Pha
2747        0x0AAC => (Some(Consonant), None),                           // Ba
2748        0x0AAD => (Some(Consonant), None),                           // Bha
2749        0x0AAE => (Some(Consonant), None),                           // Ma
2750        0x0AAF => (Some(Consonant), None),                           // Ya
2751        0x0AB0 => (Some(Consonant), None),                           // Ra
2752        0x0AB1 => (None, None),                                      // unassigned
2753        0x0AB2 => (Some(Consonant), None),                           // La
2754        0x0AB3 => (Some(Consonant), None),                           // Lla
2755        0x0AB4 => (None, None),                                      // unassigned
2756        0x0AB5 => (Some(Consonant), None),                           // Va
2757        0x0AB6 => (Some(Consonant), None),                           // Sha
2758        0x0AB7 => (Some(Consonant), None),                           // Ssa
2759        0x0AB8 => (Some(Consonant), None),                           // Sa
2760        0x0AB9 => (Some(Consonant), None),                           // Ha
2761        0x0ABA => (None, None),                                      // unassigned
2762        0x0ABB => (None, None),                                      // unassigned
2763        0x0ABC => (Some(Nukta), Some(BottomPosition)),               // Nukta
2764        0x0ABD => (Some(Avagraha), None),                            // Avagraha
2765        0x0ABE => (Some(VowelDependent), Some(RightPosition)),       // Sign Aa
2766        0x0ABF => (Some(VowelDependent), Some(LeftPosition)),        // Sign I
2767        0x0AC0 => (Some(VowelDependent), Some(RightPosition)),       // Sign Ii
2768        0x0AC1 => (Some(VowelDependent), Some(BottomPosition)),      // Sign U
2769        0x0AC2 => (Some(VowelDependent), Some(BottomPosition)),      // Sign Uu
2770        0x0AC3 => (Some(VowelDependent), Some(BottomPosition)),      // Sign Vocalic R
2771        0x0AC4 => (Some(VowelDependent), Some(BottomPosition)),      // Sign Vocalic Rr
2772        0x0AC5 => (Some(VowelDependent), Some(TopPosition)),         // Sign Candra E
2773        0x0AC6 => (None, None),                                      // unassigned
2774        0x0AC7 => (Some(VowelDependent), Some(TopPosition)),         // Sign E
2775        0x0AC8 => (Some(VowelDependent), Some(TopPosition)),         // Sign Ai
2776        0x0AC9 => (Some(VowelDependent), Some(TopAndRightPosition)), // Sign Candra O
2777        0x0ACA => (None, None),                                      // unassigned
2778        0x0ACB => (Some(VowelDependent), Some(RightPosition)),       // Sign O
2779        0x0ACC => (Some(VowelDependent), Some(RightPosition)),       // Sign Au
2780        0x0ACD => (Some(Virama), Some(BottomPosition)),              // Virama
2781        0x0ACE => (None, None),                                      // unassigned
2782        0x0ACF => (None, None),                                      // unassigned
2783        0x0AD0 => (None, None),                                      // Om
2784        0x0AD1 => (None, None),                                      // unassigned
2785        0x0AD2 => (None, None),                                      // unassigned
2786        0x0AD3 => (None, None),                                      // unassigned
2787        0x0AD4 => (None, None),                                      // unassigned
2788        0x0AD5 => (None, None),                                      // unassigned
2789        0x0AD6 => (None, None),                                      // unassigned
2790        0x0AD7 => (None, None),                                      // unassigned
2791        0x0AD8 => (None, None),                                      // unassigned
2792        0x0AD9 => (None, None),                                      // unassigned
2793        0x0ADA => (None, None),                                      // unassigned
2794        0x0ADB => (None, None),                                      // unassigned
2795        0x0ADC => (None, None),                                      // unassigned
2796        0x0ADD => (None, None),                                      // unassigned
2797        0x0ADE => (None, None),                                      // unassigned
2798        0x0ADF => (None, None),                                      // unassigned
2799        0x0AE0 => (Some(VowelIndependent), None),                    // Vocalic Rr
2800        0x0AE1 => (Some(VowelIndependent), None),                    // Vocalic Ll
2801        0x0AE2 => (Some(VowelDependent), Some(BottomPosition)),      // Sign Vocalic L
2802        0x0AE3 => (Some(VowelDependent), Some(BottomPosition)),      // Sign Vocalic Ll
2803        0x0AE4 => (None, None),                                      // unassigned
2804        0x0AE5 => (None, None),                                      // unassigned
2805        0x0AE6 => (Some(Number), None),                              // Digit Zero
2806        0x0AE7 => (Some(Number), None),                              // Digit One
2807        0x0AE8 => (Some(Number), None),                              // Digit Two
2808        0x0AE9 => (Some(Number), None),                              // Digit Three
2809        0x0AEA => (Some(Number), None),                              // Digit Four
2810        0x0AEB => (Some(Number), None),                              // Digit Five
2811        0x0AEC => (Some(Number), None),                              // Digit Six
2812        0x0AED => (Some(Number), None),                              // Digit Seven
2813        0x0AEE => (Some(Number), None),                              // Digit Eight
2814        0x0AEF => (Some(Number), None),                              // Digit Nine
2815        0x0AF0 => (Some(Symbol), None),                              // Abbreviation
2816        0x0AF1 => (Some(Symbol), None),                              // Rupee Sign
2817        0x0AF2 => (None, None),                                      // unassigned
2818        0x0AF3 => (None, None),                                      // unassigned
2819        0x0AF4 => (None, None),                                      // unassigned
2820        0x0AF5 => (None, None),                                      // unassigned
2821        0x0AF6 => (None, None),                                      // unassigned
2822        0x0AF7 => (None, None),                                      // unassigned
2823        0x0AF8 => (None, None),                                      // unassigned
2824        0x0AF9 => (Some(Consonant), None),                           // Zha
2825        0x0AFA => (Some(Cantillation), Some(TopPosition)),           // Sukun
2826        0x0AFB => (Some(Cantillation), Some(TopPosition)),           // Shadda
2827        0x0AFC => (Some(Cantillation), Some(TopPosition)),           // Maddah
2828        0x0AFD => (Some(Nukta), Some(TopPosition)),                  // Three-Dot Nukta Above
2829        0x0AFE => (Some(Nukta), Some(TopPosition)),                  // Circle Nukta Above
2830        0x0AFF => (Some(Nukta), Some(TopPosition)),                  // Two-Circle Nukta Above
2831
2832        // Oriya character table
2833        0x0B00 => (None, None),                                          // unassigned
2834        0x0B01 => (Some(Bindu), Some(TopPosition)),                      // Candrabindu
2835        0x0B02 => (Some(Bindu), Some(RightPosition)),                    // Anusvara
2836        0x0B03 => (Some(Visarga), Some(RightPosition)),                  // Visarga
2837        0x0B04 => (None, None),                                          // unassigned
2838        0x0B05 => (Some(VowelIndependent), None),                        // A
2839        0x0B06 => (Some(VowelIndependent), None),                        // Aa
2840        0x0B07 => (Some(VowelIndependent), None),                        // I
2841        0x0B08 => (Some(VowelIndependent), None),                        // Ii
2842        0x0B09 => (Some(VowelIndependent), None),                        // U
2843        0x0B0A => (Some(VowelIndependent), None),                        // Uu
2844        0x0B0B => (Some(VowelIndependent), None),                        // Vocalic R
2845        0x0B0C => (Some(VowelIndependent), None),                        // Vocalic L
2846        0x0B0D => (None, None),                                          // unassigned
2847        0x0B0E => (None, None),                                          // unassigned
2848        0x0B0F => (Some(VowelIndependent), None),                        // E
2849        0x0B10 => (Some(VowelIndependent), None),                        // Ai
2850        0x0B11 => (None, None),                                          // unassigned
2851        0x0B12 => (None, None),                                          // unassigned
2852        0x0B13 => (Some(VowelIndependent), None),                        // O
2853        0x0B14 => (Some(VowelIndependent), None),                        // Au
2854        0x0B15 => (Some(Consonant), None),                               // Ka
2855        0x0B16 => (Some(Consonant), None),                               // Kha
2856        0x0B17 => (Some(Consonant), None),                               // Ga
2857        0x0B18 => (Some(Consonant), None),                               // Gha
2858        0x0B19 => (Some(Consonant), None),                               // Nga
2859        0x0B1A => (Some(Consonant), None),                               // Ca
2860        0x0B1B => (Some(Consonant), None),                               // Cha
2861        0x0B1C => (Some(Consonant), None),                               // Ja
2862        0x0B1D => (Some(Consonant), None),                               // Jha
2863        0x0B1E => (Some(Consonant), None),                               // Nya
2864        0x0B1F => (Some(Consonant), None),                               // Tta
2865        0x0B20 => (Some(Consonant), None),                               // Ttha
2866        0x0B21 => (Some(Consonant), None),                               // Dda
2867        0x0B22 => (Some(Consonant), None),                               // Ddha
2868        0x0B23 => (Some(Consonant), None),                               // Nna
2869        0x0B24 => (Some(Consonant), None),                               // Ta
2870        0x0B25 => (Some(Consonant), None),                               // Tha
2871        0x0B26 => (Some(Consonant), None),                               // Da
2872        0x0B27 => (Some(Consonant), None),                               // Dha
2873        0x0B28 => (Some(Consonant), None),                               // Na
2874        0x0B29 => (None, None),                                          // unassigned
2875        0x0B2A => (Some(Consonant), None),                               // Pa
2876        0x0B2B => (Some(Consonant), None),                               // Pha
2877        0x0B2C => (Some(Consonant), None),                               // Ba
2878        0x0B2D => (Some(Consonant), None),                               // Bha
2879        0x0B2E => (Some(Consonant), None),                               // Ma
2880        0x0B2F => (Some(Consonant), None),                               // Ya
2881        0x0B30 => (Some(Consonant), None),                               // Ra
2882        0x0B31 => (None, None),                                          // unassigned
2883        0x0B32 => (Some(Consonant), None),                               // La
2884        0x0B33 => (Some(Consonant), None),                               // Lla
2885        0x0B34 => (None, None),                                          // unassigned
2886        0x0B35 => (Some(Consonant), None),                               // Va
2887        0x0B36 => (Some(Consonant), None),                               // Sha
2888        0x0B37 => (Some(Consonant), None),                               // Ssa
2889        0x0B38 => (Some(Consonant), None),                               // Sa
2890        0x0B39 => (Some(Consonant), None),                               // Ha
2891        0x0B3A => (None, None),                                          // unassigned
2892        0x0B3B => (None, None),                                          // unassigned
2893        0x0B3C => (Some(Nukta), Some(BottomPosition)),                   // Nukta
2894        0x0B3D => (Some(Avagraha), None),                                // Avagraha
2895        0x0B3E => (Some(VowelDependent), Some(RightPosition)),           // Sign Aa
2896        0x0B3F => (Some(VowelDependent), Some(TopPosition)),             // Sign I
2897        0x0B40 => (Some(VowelDependent), Some(RightPosition)),           // Sign Ii
2898        0x0B41 => (Some(VowelDependent), Some(BottomPosition)),          // Sign U
2899        0x0B42 => (Some(VowelDependent), Some(BottomPosition)),          // Sign Uu
2900        0x0B43 => (Some(VowelDependent), Some(BottomPosition)),          // Sign Vocalic R
2901        0x0B44 => (Some(VowelDependent), Some(BottomPosition)),          // Sign Vocalic Rr
2902        0x0B45 => (None, None),                                          // unassigned
2903        0x0B46 => (None, None),                                          // unassigned
2904        0x0B47 => (Some(VowelDependent), Some(LeftPosition)),            // Sign E
2905        0x0B48 => (Some(VowelDependent), Some(TopAndLeftPosition)),      // Sign Ai
2906        0x0B49 => (None, None),                                          // unassigned
2907        0x0B4A => (None, None),                                          // unassigned
2908        0x0B4B => (Some(VowelDependent), Some(LeftAndRightPosition)),    // Sign O
2909        0x0B4C => (Some(VowelDependent), Some(TopLeftAndRightPosition)), // Sign Au
2910        0x0B4D => (Some(Virama), Some(BottomPosition)),                  // Virama
2911        0x0B4E => (None, None),                                          // unassigned
2912        0x0B4F => (None, None),                                          // unassigned
2913        0x0B50 => (None, None),                                          // unassigned
2914        0x0B51 => (None, None),                                          // unassigned
2915        0x0B52 => (None, None),                                          // unassigned
2916        0x0B53 => (None, None),                                          // unassigned
2917        0x0B54 => (None, None),                                          // unassigned
2918        0x0B55 => (None, None),                                          // unassigned
2919        0x0B56 => (Some(VowelDependent), Some(TopPosition)),             // Ai Length Mark
2920        0x0B57 => (Some(VowelDependent), Some(TopAndRightPosition)),     // Au Length Mark
2921        0x0B58 => (None, None),                                          // unassigned
2922        0x0B59 => (None, None),                                          // unassigned
2923        0x0B5A => (None, None),                                          // unassigned
2924        0x0B5B => (None, None),                                          // unassigned
2925        0x0B5C => (Some(Consonant), None),                               // Rra
2926        0x0B5D => (Some(Consonant), None),                               // Rha
2927        0x0B5E => (None, None),                                          // unassigned
2928        0x0B5F => (Some(Consonant), None),                               // Yya
2929        0x0B60 => (Some(VowelIndependent), None),                        // Vocalic Rr
2930        0x0B61 => (Some(VowelIndependent), None),                        // Vocalic Ll
2931        0x0B62 => (Some(VowelDependent), Some(BottomPosition)),          // Sign Vocalic L
2932        0x0B63 => (Some(VowelDependent), Some(BottomPosition)),          // Sign Vocalic Ll
2933        0x0B64 => (None, None),                                          // unassigned
2934        0x0B65 => (None, None),                                          // unassigned
2935        0x0B66 => (Some(Number), None),                                  // Digit Zero
2936        0x0B67 => (Some(Number), None),                                  // Digit One
2937        0x0B68 => (Some(Number), None),                                  // Digit Two
2938        0x0B69 => (Some(Number), None),                                  // Digit Three
2939        0x0B6A => (Some(Number), None),                                  // Digit Four
2940        0x0B6B => (Some(Number), None),                                  // Digit Five
2941        0x0B6C => (Some(Number), None),                                  // Digit Six
2942        0x0B6D => (Some(Number), None),                                  // Digit Seven
2943        0x0B6E => (Some(Number), None),                                  // Digit Eight
2944        0x0B6F => (Some(Number), None),                                  // Digit Nine
2945        0x0B70 => (Some(Symbol), None),                                  // Isshar
2946        0x0B71 => (Some(Consonant), None),                               // Wa
2947        0x0B72 => (Some(Number), None),                                  // Fraction 1/4
2948        0x0B73 => (Some(Number), None),                                  // Fraction 1/2
2949        0x0B74 => (Some(Number), None),                                  // Fraction 3/4
2950        0x0B75 => (Some(Number), None),                                  // Fraction 1/16
2951        0x0B76 => (Some(Number), None),                                  // Fraction 1/8
2952        0x0B77 => (Some(Number), None),                                  // Fraction 3/16
2953        0x0B78 => (None, None),                                          // unassigned
2954        0x0B79 => (None, None),                                          // unassigned
2955        0x0B7A => (None, None),                                          // unassigned
2956        0x0B7B => (None, None),                                          // unassigned
2957        0x0B7C => (None, None),                                          // unassigned
2958        0x0B7D => (None, None),                                          // unassigned
2959        0x0B7E => (None, None),                                          // unassigned
2960        0x0B7F => (None, None),                                          // unassigned
2961
2962        // Tamil character table
2963        0x0B80 => (None, None),                                       // unassigned
2964        0x0B81 => (None, None),                                       // unassigned
2965        0x0B82 => (Some(Bindu), Some(TopPosition)),                   // Anusvara
2966        0x0B83 => (Some(ModifyingLetter), None),                      // Visarga
2967        0x0B84 => (None, None),                                       // unassigned
2968        0x0B85 => (Some(VowelIndependent), None),                     // A
2969        0x0B86 => (Some(VowelIndependent), None),                     // Aa
2970        0x0B87 => (Some(VowelIndependent), None),                     // I
2971        0x0B88 => (Some(VowelIndependent), None),                     // Ii
2972        0x0B89 => (Some(VowelIndependent), None),                     // U
2973        0x0B8A => (Some(VowelIndependent), None),                     // Uu
2974        0x0B8B => (None, None),                                       // unassigned
2975        0x0B8C => (None, None),                                       // unassigned
2976        0x0B8D => (None, None),                                       // unassigned
2977        0x0B8E => (Some(VowelIndependent), None),                     // E
2978        0x0B8F => (Some(VowelIndependent), None),                     // Ee
2979        0x0B90 => (Some(VowelIndependent), None),                     // Ai
2980        0x0B91 => (None, None),                                       // unassigned
2981        0x0B92 => (Some(VowelIndependent), None),                     // O
2982        0x0B93 => (Some(VowelIndependent), None),                     // Oo
2983        0x0B94 => (Some(VowelIndependent), None),                     // Au
2984        0x0B95 => (Some(Consonant), None),                            // Ka
2985        0x0B96 => (None, None),                                       // unassigned
2986        0x0B97 => (None, None),                                       // unassigned
2987        0x0B98 => (None, None),                                       // unassigned
2988        0x0B99 => (Some(Consonant), None),                            // Nga
2989        0x0B9A => (Some(Consonant), None),                            // Ca
2990        0x0B9B => (None, None),                                       // unassigned
2991        0x0B9C => (Some(Consonant), None),                            // Ja
2992        0x0B9D => (None, None),                                       // unassigned
2993        0x0B9E => (Some(Consonant), None),                            // Nya
2994        0x0B9F => (Some(Consonant), None),                            // Tta
2995        0x0BA0 => (None, None),                                       // unassigned
2996        0x0BA1 => (None, None),                                       // unassigned
2997        0x0BA2 => (None, None),                                       // unassigned
2998        0x0BA3 => (Some(Consonant), None),                            // Nna
2999        0x0BA4 => (Some(Consonant), None),                            // Ta
3000        0x0BA5 => (None, None),                                       // unassigned
3001        0x0BA6 => (None, None),                                       // unassigned
3002        0x0BA7 => (None, None),                                       // unassigned
3003        0x0BA8 => (Some(Consonant), None),                            // Na
3004        0x0BA9 => (Some(Consonant), None),                            // Nnna
3005        0x0BAA => (Some(Consonant), None),                            // Pa
3006        0x0BAB => (None, None),                                       // unassigned
3007        0x0BAC => (None, None),                                       // unassigned
3008        0x0BAD => (None, None),                                       // unassigned
3009        0x0BAE => (Some(Consonant), None),                            // Ma
3010        0x0BAF => (Some(Consonant), None),                            // Ya
3011        0x0BB0 => (Some(Consonant), None),                            // Ra
3012        0x0BB1 => (Some(Consonant), None),                            // Rra
3013        0x0BB2 => (Some(Consonant), None),                            // La
3014        0x0BB3 => (Some(Consonant), None),                            // Lla
3015        0x0BB4 => (Some(Consonant), None),                            // Llla
3016        0x0BB5 => (Some(Consonant), None),                            // Va
3017        0x0BB6 => (Some(Consonant), None),                            // Sha
3018        0x0BB7 => (Some(Consonant), None),                            // Ssa
3019        0x0BB8 => (Some(Consonant), None),                            // Sa
3020        0x0BB9 => (Some(Consonant), None),                            // Ha
3021        0x0BBA => (None, None),                                       // unassigned
3022        0x0BBB => (None, None),                                       // unassigned
3023        0x0BBC => (None, None),                                       // unassigned
3024        0x0BBD => (None, None),                                       // unassigned
3025        0x0BBE => (Some(VowelDependent), Some(RightPosition)),        // Sign Aa
3026        0x0BBF => (Some(VowelDependent), Some(RightPosition)),        // Sign I
3027        0x0BC0 => (Some(VowelDependent), Some(TopPosition)),          // Sign Ii
3028        0x0BC1 => (Some(VowelDependent), Some(RightPosition)),        // Sign U
3029        0x0BC2 => (Some(VowelDependent), Some(RightPosition)),        // Sign Uu
3030        0x0BC3 => (None, None),                                       // unassigned
3031        0x0BC4 => (None, None),                                       // unassigned
3032        0x0BC5 => (None, None),                                       // unassigned
3033        0x0BC6 => (Some(VowelDependent), Some(LeftPosition)),         // Sign E
3034        0x0BC7 => (Some(VowelDependent), Some(LeftPosition)),         // Sign Ee
3035        0x0BC8 => (Some(VowelDependent), Some(LeftPosition)),         // Sign Ai
3036        0x0BC9 => (None, None),                                       // unassigned
3037        0x0BCA => (Some(VowelDependent), Some(LeftAndRightPosition)), // Sign O
3038        0x0BCB => (Some(VowelDependent), Some(LeftAndRightPosition)), // Sign Oo
3039        0x0BCC => (Some(VowelDependent), Some(LeftAndRightPosition)), // Sign Au
3040        0x0BCD => (Some(Virama), Some(TopPosition)),                  // Virama
3041        0x0BCE => (None, None),                                       // unassigned
3042        0x0BCF => (None, None),                                       // unassigned
3043        0x0BD0 => (None, None),                                       // Om
3044        0x0BD1 => (None, None),                                       // unassigned
3045        0x0BD2 => (None, None),                                       // unassigned
3046        0x0BD3 => (None, None),                                       // unassigned
3047        0x0BD4 => (None, None),                                       // unassigned
3048        0x0BD5 => (None, None),                                       // unassigned
3049        0x0BD6 => (None, None),                                       // unassigned
3050        0x0BD7 => (Some(VowelDependent), Some(RightPosition)),        // Au Length Mark
3051        0x0BD8 => (None, None),                                       // unassigned
3052        0x0BD9 => (None, None),                                       // unassigned
3053        0x0BDA => (None, None),                                       // unassigned
3054        0x0BDB => (None, None),                                       // unassigned
3055        0x0BDC => (None, None),                                       // unassigned
3056        0x0BDD => (None, None),                                       // unassigned
3057        0x0BDE => (None, None),                                       // unassigned
3058        0x0BDF => (None, None),                                       // unassigned
3059        0x0BE0 => (None, None),                                       // unassigned
3060        0x0BE1 => (None, None),                                       // unassigned
3061        0x0BE2 => (None, None),                                       // unassigned
3062        0x0BE3 => (None, None),                                       // unassigned
3063        0x0BE4 => (None, None),                                       // unassigned
3064        0x0BE5 => (None, None),                                       // unassigned
3065        0x0BE6 => (Some(Number), None),                               // Digit Zero
3066        0x0BE7 => (Some(Number), None),                               // Digit One
3067        0x0BE8 => (Some(Number), None),                               // Digit Two
3068        0x0BE9 => (Some(Number), None),                               // Digit Three
3069        0x0BEA => (Some(Number), None),                               // Digit Four
3070        0x0BEB => (Some(Number), None),                               // Digit Five
3071        0x0BEC => (Some(Number), None),                               // Digit Six
3072        0x0BED => (Some(Number), None),                               // Digit Seven
3073        0x0BEE => (Some(Number), None),                               // Digit Eight
3074        0x0BEF => (Some(Number), None),                               // Digit Nine
3075        0x0BF0 => (Some(Number), None),                               // Number Ten
3076        0x0BF1 => (Some(Number), None),                               // Number One Hundred
3077        0x0BF2 => (Some(Number), None),                               // Number One Thousand
3078        0x0BF3 => (Some(Symbol), None),                               // Day Sign
3079        0x0BF4 => (Some(Symbol), None),                               // Month Sign
3080        0x0BF5 => (Some(Symbol), None),                               // Year Sign
3081        0x0BF6 => (Some(Symbol), None),                               // Debit Sign
3082        0x0BF7 => (Some(Symbol), None),                               // Credit Sign
3083        0x0BF8 => (Some(Symbol), None),                               // As Above Sign
3084        0x0BF9 => (Some(Symbol), None),                               // Tamil Rupee Sign
3085        0x0BFA => (Some(Symbol), None),                               // Number Sign
3086
3087        // Telugu character table
3088        0x0C00 => (Some(Bindu), Some(TopPosition)),                   // Combining Candrabindu Above
3089        0x0C01 => (Some(Bindu), Some(RightPosition)),                 // Candrabindu
3090        0x0C02 => (Some(Bindu), Some(RightPosition)),                 // Anusvara
3091        0x0C03 => (Some(Visarga), Some(RightPosition)),               // Visarga
3092        0x0C04 => (Some(Bindu), Some(TopPosition)),                   // Combining Anusvara Above
3093        0x0C05 => (Some(VowelIndependent), None),                     // A
3094        0x0C06 => (Some(VowelIndependent), None),                     // Aa
3095        0x0C07 => (Some(VowelIndependent), None),                     // I
3096        0x0C08 => (Some(VowelIndependent), None),                     // Ii
3097        0x0C09 => (Some(VowelIndependent), None),                     // U
3098        0x0C0A => (Some(VowelIndependent), None),                     // Uu
3099        0x0C0B => (Some(VowelIndependent), None),                     // Vocalic R
3100        0x0C0C => (Some(VowelIndependent), None),                     // Vocalic L
3101        0x0C0D => (None, None),                                       // unassigned
3102        0x0C0E => (Some(VowelIndependent), None),                     // E
3103        0x0C0F => (Some(VowelIndependent), None),                     // Ee
3104        0x0C10 => (Some(VowelIndependent), None),                     // Ai
3105        0x0C11 => (None, None),                                       // unassigned
3106        0x0C12 => (Some(VowelIndependent), None),                     // O
3107        0x0C13 => (Some(VowelIndependent), None),                     // Oo
3108        0x0C14 => (Some(VowelIndependent), None),                     // Au
3109        0x0C15 => (Some(Consonant), None),                            // Ka
3110        0x0C16 => (Some(Consonant), None),                            // Kha
3111        0x0C17 => (Some(Consonant), None),                            // Ga
3112        0x0C18 => (Some(Consonant), None),                            // Gha
3113        0x0C19 => (Some(Consonant), None),                            // Nga
3114        0x0C1A => (Some(Consonant), None),                            // Ca
3115        0x0C1B => (Some(Consonant), None),                            // Cha
3116        0x0C1C => (Some(Consonant), None),                            // Ja
3117        0x0C1D => (Some(Consonant), None),                            // Jha
3118        0x0C1E => (Some(Consonant), None),                            // Nya
3119        0x0C1F => (Some(Consonant), None),                            // Tta
3120        0x0C20 => (Some(Consonant), None),                            // Ttha
3121        0x0C21 => (Some(Consonant), None),                            // Dda
3122        0x0C22 => (Some(Consonant), None),                            // Ddha
3123        0x0C23 => (Some(Consonant), None),                            // Nna
3124        0x0C24 => (Some(Consonant), None),                            // Ta
3125        0x0C25 => (Some(Consonant), None),                            // Tha
3126        0x0C26 => (Some(Consonant), None),                            // Da
3127        0x0C27 => (Some(Consonant), None),                            // Dha
3128        0x0C28 => (Some(Consonant), None),                            // Na
3129        0x0C29 => (None, None),                                       // unassigned
3130        0x0C2A => (Some(Consonant), None),                            // Pa
3131        0x0C2B => (Some(Consonant), None),                            // Pha
3132        0x0C2C => (Some(Consonant), None),                            // Ba
3133        0x0C2D => (Some(Consonant), None),                            // Bha
3134        0x0C2E => (Some(Consonant), None),                            // Ma
3135        0x0C2F => (Some(Consonant), None),                            // Ya
3136        0x0C30 => (Some(Consonant), None),                            // Ra
3137        0x0C31 => (Some(Consonant), None),                            // Rra
3138        0x0C32 => (Some(Consonant), None),                            // La
3139        0x0C33 => (Some(Consonant), None),                            // Lla
3140        0x0C34 => (Some(Consonant), None),                            // Llla
3141        0x0C35 => (Some(Consonant), None),                            // Va
3142        0x0C36 => (Some(Consonant), None),                            // Sha
3143        0x0C37 => (Some(Consonant), None),                            // Ssa
3144        0x0C38 => (Some(Consonant), None),                            // Sa
3145        0x0C39 => (Some(Consonant), None),                            // Ha
3146        0x0C3A => (None, None),                                       // unassigned
3147        0x0C3B => (None, None),                                       // unassigned
3148        0x0C3C => (Some(Nukta), Some(BottomPosition)),                // Nukta
3149        0x0C3D => (Some(Avagraha), None),                             // Avagraha
3150        0x0C3E => (Some(VowelDependent), Some(TopPosition)),          // Sign Aa
3151        0x0C3F => (Some(VowelDependent), Some(TopPosition)),          // Sign I
3152        0x0C40 => (Some(VowelDependent), Some(TopPosition)),          // Sign Ii
3153        0x0C41 => (Some(VowelDependent), Some(RightPosition)),        // Sign U
3154        0x0C42 => (Some(VowelDependent), Some(RightPosition)),        // Sign Uu
3155        0x0C43 => (Some(VowelDependent), Some(RightPosition)),        // Sign Vocalic R
3156        0x0C44 => (Some(VowelDependent), Some(RightPosition)),        // Sign Vocalic Rr
3157        0x0C45 => (None, None),                                       // unassigned
3158        0x0C46 => (Some(VowelDependent), Some(TopPosition)),          // Sign E
3159        0x0C47 => (Some(VowelDependent), Some(TopPosition)),          // Sign Ee
3160        0x0C48 => (Some(VowelDependent), Some(TopAndBottomPosition)), // Sign Ai
3161        0x0C49 => (None, None),                                       // unassigned
3162        0x0C4A => (Some(VowelDependent), Some(TopPosition)),          // Sign O
3163        0x0C4B => (Some(VowelDependent), Some(TopPosition)),          // Sign Oo
3164        0x0C4C => (Some(VowelDependent), Some(TopPosition)),          // Sign Au
3165        0x0C4D => (Some(Virama), Some(TopPosition)),                  // Virama
3166        0x0C4E => (None, None),                                       // unassigned
3167        0x0C4F => (None, None),                                       // unassigned
3168        0x0C50 => (None, None),                                       // unassigned
3169        0x0C51 => (None, None),                                       // unassigned
3170        0x0C52 => (None, None),                                       // unassigned
3171        0x0C53 => (None, None),                                       // unassigned
3172        0x0C54 => (None, None),                                       // unassigned
3173        0x0C55 => (Some(VowelDependent), Some(TopPosition)),          // Length Mark
3174        0x0C56 => (Some(VowelDependent), Some(BottomPosition)),       // Ai Length Mark
3175        0x0C57 => (None, None),                                       // unassigned
3176        0x0C58 => (Some(Consonant), None),                            // Tsa
3177        0x0C59 => (Some(Consonant), None),                            // Dza
3178        0x0C5A => (Some(Consonant), None),                            // Rrra
3179        0x0C5B => (None, None),                                       // unassigned
3180        0x0C5C => (None, None),                                       // unassigned
3181        0x0C5D => (Some(ConsonantDead), None),                        // Nakaara Pollu
3182        0x0C5E => (None, None),                                       // unassigned
3183        0x0C5F => (None, None),                                       // unassigned
3184        0x0C60 => (Some(VowelIndependent), None),                     // Vocalic Rr
3185        0x0C61 => (Some(VowelIndependent), None),                     // Vocalic Ll
3186        0x0C62 => (Some(VowelDependent), Some(BottomPosition)),       // Sign Vocalic L
3187        0x0C63 => (Some(VowelDependent), Some(BottomPosition)),       // Sign Vocalic Ll
3188        0x0C64 => (None, None),                                       // unassigned
3189        0x0C65 => (None, None),                                       // unassigned
3190        0x0C66 => (Some(Number), None),                               // Digit Zero
3191        0x0C67 => (Some(Number), None),                               // Digit One
3192        0x0C68 => (Some(Number), None),                               // Digit Two
3193        0x0C69 => (Some(Number), None),                               // Digit Three
3194        0x0C6A => (Some(Number), None),                               // Digit Four
3195        0x0C6B => (Some(Number), None),                               // Digit Five
3196        0x0C6C => (Some(Number), None),                               // Digit Six
3197        0x0C6D => (Some(Number), None),                               // Digit Seven
3198        0x0C6E => (Some(Number), None),                               // Digit Eight
3199        0x0C6F => (Some(Number), None),                               // Digit Nine
3200        0x0C70 => (None, None),                                       // unassigned
3201        0x0C71 => (None, None),                                       // unassigned
3202        0x0C72 => (None, None),                                       // unassigned
3203        0x0C73 => (None, None),                                       // unassigned
3204        0x0C74 => (None, None),                                       // unassigned
3205        0x0C75 => (None, None),                                       // unassigned
3206        0x0C76 => (None, None),                                       // unassigned
3207        0x0C77 => (None, None),                                       // unassigned
3208        0x0C78 => (Some(Number), None),                               // Fraction Zero Odd P
3209        0x0C79 => (Some(Number), None),                               // Fraction One Odd P
3210        0x0C7A => (Some(Number), None),                               // Fraction Two Odd P
3211        0x0C7B => (Some(Number), None),                               // Fraction Three Odd P
3212        0x0C7C => (Some(Number), None),                               // Fraction One Even P
3213        0x0C7D => (Some(Number), None),                               // Fraction Two Even P
3214        0x0C7E => (Some(Number), None),                               // Fraction Three Even P
3215        0x0C7F => (Some(Symbol), None),                               // Tuumu
3216
3217        // Kannada character table
3218        0x0C80 => (None, None),                                      // Spacing Candrabindu
3219        0x0C81 => (Some(Bindu), Some(TopPosition)),                  // Candrabindu
3220        0x0C82 => (Some(Bindu), Some(RightPosition)),                // Anusvara
3221        0x0C83 => (Some(Visarga), Some(RightPosition)),              // Visarga
3222        0x0C84 => (None, None),                                      // Siddham
3223        0x0C85 => (Some(VowelIndependent), None),                    // A
3224        0x0C86 => (Some(VowelIndependent), None),                    // Aa
3225        0x0C87 => (Some(VowelIndependent), None),                    // I
3226        0x0C88 => (Some(VowelIndependent), None),                    // Ii
3227        0x0C89 => (Some(VowelIndependent), None),                    // U
3228        0x0C8A => (Some(VowelIndependent), None),                    // Uu
3229        0x0C8B => (Some(VowelIndependent), None),                    // Vocalic R
3230        0x0C8C => (Some(VowelIndependent), None),                    // Vocalic L
3231        0x0C8D => (None, None),                                      // unassigned
3232        0x0C8E => (Some(VowelIndependent), None),                    // E
3233        0x0C8F => (Some(VowelIndependent), None),                    // Ee
3234        0x0C90 => (Some(VowelIndependent), None),                    // Ai
3235        0x0C91 => (None, None),                                      // unassigned
3236        0x0C92 => (Some(VowelIndependent), None),                    // O
3237        0x0C93 => (Some(VowelIndependent), None),                    // Oo
3238        0x0C94 => (Some(VowelIndependent), None),                    // Au
3239        0x0C95 => (Some(Consonant), None),                           // Ka
3240        0x0C96 => (Some(Consonant), None),                           // Kha
3241        0x0C97 => (Some(Consonant), None),                           // Ga
3242        0x0C98 => (Some(Consonant), None),                           // Gha
3243        0x0C99 => (Some(Consonant), None),                           // Nga
3244        0x0C9A => (Some(Consonant), None),                           // Ca
3245        0x0C9B => (Some(Consonant), None),                           // Cha
3246        0x0C9C => (Some(Consonant), None),                           // Ja
3247        0x0C9D => (Some(Consonant), None),                           // Jha
3248        0x0C9E => (Some(Consonant), None),                           // Nya
3249        0x0C9F => (Some(Consonant), None),                           // Tta
3250        0x0CA0 => (Some(Consonant), None),                           // Ttha
3251        0x0CA1 => (Some(Consonant), None),                           // Dda
3252        0x0CA2 => (Some(Consonant), None),                           // Ddha
3253        0x0CA3 => (Some(Consonant), None),                           // Nna
3254        0x0CA4 => (Some(Consonant), None),                           // Ta
3255        0x0CA5 => (Some(Consonant), None),                           // Tha
3256        0x0CA6 => (Some(Consonant), None),                           // Da
3257        0x0CA7 => (Some(Consonant), None),                           // Dha
3258        0x0CA8 => (Some(Consonant), None),                           // Na
3259        0x0CA9 => (None, None),                                      // unassigned
3260        0x0CAA => (Some(Consonant), None),                           // Pa
3261        0x0CAB => (Some(Consonant), None),                           // Pha
3262        0x0CAC => (Some(Consonant), None),                           // Ba
3263        0x0CAD => (Some(Consonant), None),                           // Bha
3264        0x0CAE => (Some(Consonant), None),                           // Ma
3265        0x0CAF => (Some(Consonant), None),                           // Ya
3266        0x0CB0 => (Some(Consonant), None),                           // Ra
3267        0x0CB1 => (Some(Consonant), None),                           // Rra
3268        0x0CB2 => (Some(Consonant), None),                           // La
3269        0x0CB3 => (Some(Consonant), None),                           // Lla
3270        0x0CB4 => (None, None),                                      // unassigned
3271        0x0CB5 => (Some(Consonant), None),                           // Va
3272        0x0CB6 => (Some(Consonant), None),                           // Sha
3273        0x0CB7 => (Some(Consonant), None),                           // Ssa
3274        0x0CB8 => (Some(Consonant), None),                           // Sa
3275        0x0CB9 => (Some(Consonant), None),                           // Ha
3276        0x0CBA => (None, None),                                      // unassigned
3277        0x0CBB => (None, None),                                      // unassigned
3278        0x0CBC => (Some(Nukta), Some(BottomPosition)),               // Nukta
3279        0x0CBD => (Some(Avagraha), None),                            // Avagraha
3280        0x0CBE => (Some(VowelDependent), Some(RightPosition)),       // Sign Aa
3281        0x0CBF => (Some(VowelDependent), Some(TopPosition)),         // Sign I
3282        0x0CC0 => (Some(VowelDependent), Some(TopAndRightPosition)), // Sign Ii
3283        0x0CC1 => (Some(VowelDependent), Some(RightPosition)),       // Sign U
3284        0x0CC2 => (Some(VowelDependent), Some(RightPosition)),       // Sign Uu
3285        0x0CC3 => (Some(VowelDependent), Some(RightPosition)),       // Sign Vocalic R
3286        0x0CC4 => (Some(VowelDependent), Some(RightPosition)),       // Sign Vocalic Rr
3287        0x0CC5 => (None, None),                                      // unassigned
3288        0x0CC6 => (Some(VowelDependent), Some(TopPosition)),         // Sign E
3289        0x0CC7 => (Some(VowelDependent), Some(TopAndRightPosition)), // Sign Ee
3290        0x0CC8 => (Some(VowelDependent), Some(TopAndRightPosition)), // Sign Ai
3291        0x0CC9 => (None, None),                                      // unassigned
3292        0x0CCA => (Some(VowelDependent), Some(TopAndRightPosition)), // Sign O
3293        0x0CCB => (Some(VowelDependent), Some(TopAndRightPosition)), // Sign Oo
3294        0x0CCC => (Some(VowelDependent), Some(TopPosition)),         // Sign Au
3295        0x0CCD => (Some(Virama), Some(TopPosition)),                 // Virama
3296        0x0CCE => (None, None),                                      // unassigned
3297        0x0CCF => (None, None),                                      // unassigned
3298        0x0CD0 => (None, None),                                      // unassigned
3299        0x0CD1 => (None, None),                                      // unassigned
3300        0x0CD2 => (None, None),                                      // unassigned
3301        0x0CD3 => (None, None),                                      // unassigned
3302        0x0CD4 => (None, None),                                      // unassigned
3303        0x0CD5 => (Some(VowelDependent), Some(RightPosition)),       // Length Mark
3304        0x0CD6 => (Some(VowelDependent), Some(RightPosition)),       // Ai Length Mark
3305        0x0CD7 => (None, None),                                      // unassigned
3306        0x0CD8 => (None, None),                                      // unassigned
3307        0x0CD9 => (None, None),                                      // unassigned
3308        0x0CDA => (None, None),                                      // unassigned
3309        0x0CDB => (None, None),                                      // unassigned
3310        0x0CDC => (None, None),                                      // unassigned
3311        0x0CDD => (Some(ConsonantDead), None),                       // Nakaara Pollu
3312        0x0CDE => (Some(Consonant), None),                           // Fa
3313        0x0CDF => (None, None),                                      // unassigned
3314        0x0CE0 => (Some(VowelIndependent), None),                    // Vocalic Rr
3315        0x0CE1 => (Some(VowelIndependent), None),                    // Vocalic Ll
3316        0x0CE2 => (Some(VowelDependent), Some(BottomPosition)),      // Sign Vocalic L
3317        0x0CE3 => (Some(VowelDependent), Some(BottomPosition)),      // Sign Vocalic Ll
3318        0x0CE4 => (None, None),                                      // unassigned
3319        0x0CE5 => (None, None),                                      // unassigned
3320        0x0CE6 => (Some(Number), None),                              // Digit Zero
3321        0x0CE7 => (Some(Number), None),                              // Digit One
3322        0x0CE8 => (Some(Number), None),                              // Digit Two
3323        0x0CE9 => (Some(Number), None),                              // Digit Three
3324        0x0CEA => (Some(Number), None),                              // Digit Four
3325        0x0CEB => (Some(Number), None),                              // Digit Five
3326        0x0CEC => (Some(Number), None),                              // Digit Six
3327        0x0CED => (Some(Number), None),                              // Digit Seven
3328        0x0CEE => (Some(Number), None),                              // Digit Eight
3329        0x0CEF => (Some(Number), None),                              // Digit Nine
3330        0x0CF0 => (None, None),                                      // unassigned
3331        0x0CF1 => (Some(ConsonantWithStacker), None),                // Jihvamuliya
3332        0x0CF2 => (Some(ConsonantWithStacker), None),                // Upadhmaniya
3333
3334        // Malayalam character table
3335        0x0D00 => (Some(Bindu), Some(TopPosition)),                   // Combining Anusvara Above
3336        0x0D01 => (Some(Bindu), Some(TopPosition)),                   // Candrabindu
3337        0x0D02 => (Some(Bindu), Some(RightPosition)),                 // Anusvara
3338        0x0D03 => (Some(Visarga), Some(RightPosition)),               // Visarga
3339        0x0D04 => (None, None),                                       // unassigned
3340        0x0D05 => (Some(VowelIndependent), None),                     // A
3341        0x0D06 => (Some(VowelIndependent), None),                     // Aa
3342        0x0D07 => (Some(VowelIndependent), None),                     // I
3343        0x0D08 => (Some(VowelIndependent), None),                     // Ii
3344        0x0D09 => (Some(VowelIndependent), None),                     // U
3345        0x0D0A => (Some(VowelIndependent), None),                     // Uu
3346        0x0D0B => (Some(VowelIndependent), None),                     // Vocalic R
3347        0x0D0C => (Some(VowelIndependent), None),                     // Vocalic L
3348        0x0D0D => (None, None),                                       // unassigned
3349        0x0D0E => (Some(VowelIndependent), None),                     // E
3350        0x0D0F => (Some(VowelIndependent), None),                     // Ee
3351        0x0D10 => (Some(VowelIndependent), None),                     // Ai
3352        0x0D11 => (None, None),                                       // unassigned
3353        0x0D12 => (Some(VowelIndependent), None),                     // O
3354        0x0D13 => (Some(VowelIndependent), None),                     // Oo
3355        0x0D14 => (Some(VowelIndependent), None),                     // Au
3356        0x0D15 => (Some(Consonant), None),                            // Ka
3357        0x0D16 => (Some(Consonant), None),                            // Kha
3358        0x0D17 => (Some(Consonant), None),                            // Ga
3359        0x0D18 => (Some(Consonant), None),                            // Gha
3360        0x0D19 => (Some(Consonant), None),                            // Nga
3361        0x0D1A => (Some(Consonant), None),                            // Ca
3362        0x0D1B => (Some(Consonant), None),                            // Cha
3363        0x0D1C => (Some(Consonant), None),                            // Ja
3364        0x0D1D => (Some(Consonant), None),                            // Jha
3365        0x0D1E => (Some(Consonant), None),                            // Nya
3366        0x0D1F => (Some(Consonant), None),                            // Tta
3367        0x0D20 => (Some(Consonant), None),                            // Ttha
3368        0x0D21 => (Some(Consonant), None),                            // Dda
3369        0x0D22 => (Some(Consonant), None),                            // Ddha
3370        0x0D23 => (Some(Consonant), None),                            // Nna
3371        0x0D24 => (Some(Consonant), None),                            // Ta
3372        0x0D25 => (Some(Consonant), None),                            // Tha
3373        0x0D26 => (Some(Consonant), None),                            // Da
3374        0x0D27 => (Some(Consonant), None),                            // Dha
3375        0x0D28 => (Some(Consonant), None),                            // Na
3376        0x0D29 => (Some(Consonant), None),                            // Nnna
3377        0x0D2A => (Some(Consonant), None),                            // Pa
3378        0x0D2B => (Some(Consonant), None),                            // Pha
3379        0x0D2C => (Some(Consonant), None),                            // Ba
3380        0x0D2D => (Some(Consonant), None),                            // Bha
3381        0x0D2E => (Some(Consonant), None),                            // Ma
3382        0x0D2F => (Some(Consonant), None),                            // Ya
3383        0x0D30 => (Some(Consonant), None),                            // Ra
3384        0x0D31 => (Some(Consonant), None),                            // Rra
3385        0x0D32 => (Some(Consonant), None),                            // La
3386        0x0D33 => (Some(Consonant), None),                            // Lla
3387        0x0D34 => (Some(Consonant), None),                            // Llla
3388        0x0D35 => (Some(Consonant), None),                            // Va
3389        0x0D36 => (Some(Consonant), None),                            // Sha
3390        0x0D37 => (Some(Consonant), None),                            // Ssa
3391        0x0D38 => (Some(Consonant), None),                            // Sa
3392        0x0D39 => (Some(Consonant), None),                            // Ha
3393        0x0D3A => (Some(Consonant), None),                            // Ttta
3394        0x0D3B => (Some(PureKiller), Some(TopPosition)),              // Vertical Bar Virama
3395        0x0D3C => (Some(PureKiller), Some(TopPosition)),              // Circular Virama
3396        0x0D3D => (Some(Avagraha), None),                             // Avagraha
3397        0x0D3E => (Some(VowelDependent), Some(RightPosition)),        // Sign Aa
3398        0x0D3F => (Some(VowelDependent), Some(RightPosition)),        // Sign I
3399        0x0D40 => (Some(VowelDependent), Some(RightPosition)),        // Sign Ii
3400        0x0D41 => (Some(VowelDependent), Some(RightPosition)),        // Sign U
3401        0x0D42 => (Some(VowelDependent), Some(RightPosition)),        // Sign Uu
3402        0x0D43 => (Some(VowelDependent), Some(BottomPosition)),       // Sign Vocalic R
3403        0x0D44 => (Some(VowelDependent), Some(BottomPosition)),       // Sign Vocalic Rr
3404        0x0D45 => (None, None),                                       // unassigned
3405        0x0D46 => (Some(VowelDependent), Some(LeftPosition)),         // Sign E
3406        0x0D47 => (Some(VowelDependent), Some(LeftPosition)),         // Sign Ee
3407        0x0D48 => (Some(VowelDependent), Some(LeftPosition)),         // Sign Ai
3408        0x0D49 => (None, None),                                       // unassigned
3409        0x0D4A => (Some(VowelDependent), Some(LeftAndRightPosition)), // Sign O
3410        0x0D4B => (Some(VowelDependent), Some(LeftAndRightPosition)), // Sign Oo
3411        0x0D4C => (Some(VowelDependent), Some(LeftAndRightPosition)), // Sign Au
3412        0x0D4D => (Some(Virama), Some(TopPosition)),                  // Virama
3413        0x0D4E => (Some(ConsonantPreRepha), None),                    // Dot Reph
3414        0x0D4F => (Some(Symbol), None),                               // Para
3415        0x0D50 => (None, None),                                       // unassigned
3416        0x0D51 => (None, None),                                       // unassigned
3417        0x0D52 => (None, None),                                       // unassigned
3418        0x0D53 => (None, None),                                       // unassigned
3419        0x0D54 => (Some(ConsonantDead), None),                        // Chillu M
3420        0x0D55 => (Some(ConsonantDead), None),                        // Chillu Y
3421        0x0D56 => (Some(ConsonantDead), None),                        // Chillu Lll
3422        0x0D57 => (Some(VowelDependent), Some(RightPosition)),        // Au Length Mark
3423        0x0D58 => (Some(Number), None),                               // Fraction 1/160
3424        0x0D59 => (Some(Number), None),                               // Fraction 1/40
3425        0x0D5A => (Some(Number), None),                               // Fraction 3/80
3426        0x0D5B => (Some(Number), None),                               // Fraction 1/20
3427        0x0D5C => (Some(Number), None),                               // Fraction 1/10
3428        0x0D5D => (Some(Number), None),                               // Fraction 3/20
3429        0x0D5E => (Some(Number), None),                               // Fraction 1/5
3430        0x0D5F => (Some(VowelIndependent), None),                     // Archaic Ii
3431        0x0D60 => (Some(VowelIndependent), None),                     // Vocalic Rr
3432        0x0D61 => (Some(VowelIndependent), None),                     // Vocalic Ll
3433        0x0D62 => (Some(VowelDependent), Some(BottomPosition)),       // Sign Vocalic L
3434        0x0D63 => (Some(VowelDependent), Some(BottomPosition)),       // Sign Vocalic Ll
3435        0x0D64 => (None, None),                                       // unassigned
3436        0x0D65 => (None, None),                                       // unassigned
3437        0x0D66 => (Some(Number), None),                               // Digit Zero
3438        0x0D67 => (Some(Number), None),                               // Digit One
3439        0x0D68 => (Some(Number), None),                               // Digit Two
3440        0x0D69 => (Some(Number), None),                               // Digit Three
3441        0x0D6A => (Some(Number), None),                               // Digit Four
3442        0x0D6B => (Some(Number), None),                               // Digit Five
3443        0x0D6C => (Some(Number), None),                               // Digit Six
3444        0x0D6D => (Some(Number), None),                               // Digit Seven
3445        0x0D6E => (Some(Number), None),                               // Digit Eight
3446        0x0D6F => (Some(Number), None),                               // Digit Nine
3447        0x0D70 => (Some(Number), None),                               // Number Ten
3448        0x0D71 => (Some(Number), None),                               // Number One Hundred
3449        0x0D72 => (Some(Number), None),                               // Number One Thousand
3450        0x0D73 => (Some(Number), None),                               // Fraction 1/4
3451        0x0D74 => (Some(Number), None),                               // Fraction 1/2
3452        0x0D75 => (Some(Number), None),                               // Fraction 3/4
3453        0x0D76 => (Some(Number), None),                               // Fraction 1/16
3454        0x0D77 => (Some(Number), None),                               // Fraction 1/8
3455        0x0D78 => (Some(Number), None),                               // Fraction 3/16
3456        0x0D79 => (Some(Symbol), None),                               // Date Mark
3457        0x0D7A => (Some(ConsonantDead), None),                        // Chillu Nn
3458        0x0D7B => (Some(ConsonantDead), None),                        // Chillu N
3459        0x0D7C => (Some(ConsonantDead), None),                        // Chillu Rr
3460        0x0D7D => (Some(ConsonantDead), None),                        // Chillu L
3461        0x0D7E => (Some(ConsonantDead), None),                        // Chillu Ll
3462        0x0D7F => (Some(ConsonantDead), None),                        // Chillu K
3463
3464        // Sinhala character table
3465        0x0D80 => (None, None),                                          // unassigned
3466        0x0D81 => (None, None),                                          // unassigned
3467        0x0D82 => (Some(Bindu), Some(RightPosition)),                    // Anusvara
3468        0x0D83 => (Some(Visarga), Some(RightPosition)),                  // Visarga
3469        0x0D84 => (None, None),                                          // unassigned
3470        0x0D85 => (Some(VowelIndependent), None),                        // A
3471        0x0D86 => (Some(VowelIndependent), None),                        // Aa
3472        0x0D87 => (Some(VowelIndependent), None),                        // Ae
3473        0x0D88 => (Some(VowelIndependent), None),                        // Aae
3474        0x0D89 => (Some(VowelIndependent), None),                        // I
3475        0x0D8A => (Some(VowelIndependent), None),                        // Ii
3476        0x0D8B => (Some(VowelIndependent), None),                        // U
3477        0x0D8C => (Some(VowelIndependent), None),                        // Uu
3478        0x0D8D => (Some(VowelIndependent), None),                        // Vocalic R
3479        0x0D8E => (Some(VowelIndependent), None),                        // Vocalic Rr
3480        0x0D8F => (Some(VowelIndependent), None),                        // Vocalic L
3481        0x0D90 => (Some(VowelIndependent), None),                        // Vocalic Ll
3482        0x0D91 => (Some(VowelIndependent), None),                        // E
3483        0x0D92 => (Some(VowelIndependent), None),                        // Ee
3484        0x0D93 => (Some(VowelIndependent), None),                        // Ai
3485        0x0D94 => (Some(VowelIndependent), None),                        // O
3486        0x0D95 => (Some(VowelIndependent), None),                        // Oo
3487        0x0D96 => (Some(VowelIndependent), None),                        // Au
3488        0x0D97 => (None, None),                                          // unassigned
3489        0x0D98 => (None, None),                                          // unassigned
3490        0x0D99 => (None, None),                                          // unassigned
3491        0x0D9A => (Some(Consonant), None),                               // Ka
3492        0x0D9B => (Some(Consonant), None),                               // Kha
3493        0x0D9C => (Some(Consonant), None),                               // Ga
3494        0x0D9D => (Some(Consonant), None),                               // Gha
3495        0x0D9E => (Some(Consonant), None),                               // Nga
3496        0x0D9F => (Some(Consonant), None),                               // Nnga
3497        0x0DA0 => (Some(Consonant), None),                               // Ca
3498        0x0DA1 => (Some(Consonant), None),                               // Cha
3499        0x0DA2 => (Some(Consonant), None),                               // Ja
3500        0x0DA3 => (Some(Consonant), None),                               // Jha
3501        0x0DA4 => (Some(Consonant), None),                               // Nya
3502        0x0DA5 => (Some(Consonant), None),                               // Jnya
3503        0x0DA6 => (Some(Consonant), None),                               // Nyja
3504        0x0DA7 => (Some(Consonant), None),                               // Tta
3505        0x0DA8 => (Some(Consonant), None),                               // Ttha
3506        0x0DA9 => (Some(Consonant), None),                               // Dda
3507        0x0DAA => (Some(Consonant), None),                               // Ddha
3508        0x0DAB => (Some(Consonant), None),                               // Nna
3509        0x0DAC => (Some(Consonant), None),                               // Nndda
3510        0x0DAD => (Some(Consonant), None),                               // Ta
3511        0x0DAE => (Some(Consonant), None),                               // Tha
3512        0x0DAF => (Some(Consonant), None),                               // Da
3513        0x0DB0 => (Some(Consonant), None),                               // Dha
3514        0x0DB1 => (Some(Consonant), None),                               // Na
3515        0x0DB2 => (None, None),                                          // unassigned
3516        0x0DB3 => (Some(Consonant), None),                               // Nda
3517        0x0DB4 => (Some(Consonant), None),                               // Pa
3518        0x0DB5 => (Some(Consonant), None),                               // Pha
3519        0x0DB6 => (Some(Consonant), None),                               // Ba
3520        0x0DB7 => (Some(Consonant), None),                               // Bha
3521        0x0DB8 => (Some(Consonant), None),                               // Ma
3522        0x0DB9 => (Some(Consonant), None),                               // Mba
3523        0x0DBA => (Some(Consonant), None),                               // Ya
3524        0x0DBB => (Some(Consonant), None),                               // Ra
3525        0x0DBC => (None, None),                                          // unassigned
3526        0x0DBD => (Some(Consonant), None),                               // La
3527        0x0DBE => (None, None),                                          // unassigned
3528        0x0DBF => (None, None),                                          // unassigned
3529        0x0DC0 => (Some(Consonant), None),                               // Va
3530        0x0DC1 => (Some(Consonant), None),                               // Sha
3531        0x0DC2 => (Some(Consonant), None),                               // Ssa
3532        0x0DC3 => (Some(Consonant), None),                               // Sa
3533        0x0DC4 => (Some(Consonant), None),                               // Ha
3534        0x0DC5 => (Some(Consonant), None),                               // Lla
3535        0x0DC6 => (Some(Consonant), None),                               // Fa
3536        0x0DC7 => (None, None),                                          // unassigned
3537        0x0DC8 => (None, None),                                          // unassigned
3538        0x0DC9 => (None, None),                                          // unassigned
3539        0x0DCA => (Some(Virama), Some(TopPosition)),                     // Virama
3540        0x0DCB => (None, None),                                          // unassigned
3541        0x0DCC => (None, None),                                          // unassigned
3542        0x0DCD => (None, None),                                          // unassigned
3543        0x0DCE => (None, None),                                          // unassigned
3544        0x0DCF => (Some(VowelDependent), Some(RightPosition)),           // Sign Aa
3545        0x0DD0 => (Some(VowelDependent), Some(RightPosition)),           // Sign Ae
3546        0x0DD1 => (Some(VowelDependent), Some(RightPosition)),           // Sign Aae
3547        0x0DD2 => (Some(VowelDependent), Some(TopPosition)),             // Sign I
3548        0x0DD3 => (Some(VowelDependent), Some(TopPosition)),             // Sign Ii
3549        0x0DD4 => (Some(VowelDependent), Some(BottomPosition)),          // Sign U
3550        0x0DD5 => (None, None),                                          // unassigned
3551        0x0DD6 => (Some(VowelDependent), Some(BottomPosition)),          // Sign Uu
3552        0x0DD7 => (None, None),                                          // unassigned
3553        0x0DD8 => (Some(VowelDependent), Some(RightPosition)),           // Sign Vocalic R
3554        0x0DD9 => (Some(VowelDependent), Some(LeftPosition)),            // Sign E
3555        0x0DDA => (Some(VowelDependent), Some(TopAndLeftPosition)),      // Sign Ee
3556        0x0DDB => (Some(VowelDependent), Some(LeftPosition)),            // Sign Ai
3557        0x0DDC => (Some(VowelDependent), Some(LeftAndRightPosition)),    // Sign O
3558        0x0DDD => (Some(VowelDependent), Some(TopLeftAndRightPosition)), // Sign Oo
3559        0x0DDE => (Some(VowelDependent), Some(LeftAndRightPosition)),    // Sign Au
3560        0x0DDF => (Some(VowelDependent), Some(RightPosition)),           // Sign Vocalic L
3561        0x0DE0 => (None, None),                                          // unassigned
3562        0x0DE1 => (None, None),                                          // unassigned
3563        0x0DE2 => (None, None),                                          // unassigned
3564        0x0DE3 => (None, None),                                          // unassigned
3565        0x0DE4 => (None, None),                                          // unassigned
3566        0x0DE5 => (None, None),                                          // unassigned
3567        0x0DE6 => (Some(Number), None),                                  // Digit Zero
3568        0x0DE7 => (Some(Number), None),                                  // Digit One
3569        0x0DE8 => (Some(Number), None),                                  // Digit Two
3570        0x0DE9 => (Some(Number), None),                                  // Digit Three
3571        0x0DEA => (Some(Number), None),                                  // Digit Four
3572        0x0DEB => (Some(Number), None),                                  // Digit Five
3573        0x0DEC => (Some(Number), None),                                  // Digit Six
3574        0x0DED => (Some(Number), None),                                  // Digit Seven
3575        0x0DEE => (Some(Number), None),                                  // Digit Eight
3576        0x0DEF => (Some(Number), None),                                  // Digit Nine
3577        0x0DF0 => (None, None),                                          // unassigned
3578        0x0DF1 => (None, None),                                          // unassigned
3579        0x0DF2 => (Some(VowelDependent), Some(RightPosition)),           // Sign Vocalic Rr
3580        0x0DF3 => (Some(VowelDependent), Some(RightPosition)),           // Sign Vocalic Ll
3581        0x0DF4 => (None, None),                                          // Kunddaliya
3582        0x0DF5 => (None, None),                                          // unassigned
3583        0x0DF6 => (None, None),                                          // unassigned
3584        0x0DF7 => (None, None),                                          // unassigned
3585        0x0DF8 => (None, None),                                          // unassigned
3586        0x0DF9 => (None, None),                                          // unassigned
3587        0x0DFA => (None, None),                                          // unassigned
3588        0x0DFB => (None, None),                                          // unassigned
3589        0x0DFC => (None, None),                                          // unassigned
3590        0x0DFD => (None, None),                                          // unassigned
3591        0x0DFE => (None, None),                                          // unassigned
3592        0x0DFF => (None, None),                                          // unassigned
3593
3594        // Vedic Extensions character table
3595        0x1CD0 => (Some(Cantillation), Some(TopPosition)),    // Tone Karshana
3596        0x1CD1 => (Some(Cantillation), Some(TopPosition)),    // Tone Shara
3597        0x1CD2 => (Some(Cantillation), Some(TopPosition)),    // Tone Prenkha
3598        0x1CD3 => (None, None),                               // Sign Nihshvasa
3599        0x1CD4 => (Some(Cantillation), Some(Overstruck)),     // Tone Midline Svarita
3600        0x1CD5 => (Some(Cantillation), Some(BottomPosition)), // Tone Aggravated Independent Svarita
3601        0x1CD6 => (Some(Cantillation), Some(BottomPosition)), // Tone Independent Svarita
3602        0x1CD7 => (Some(Cantillation), Some(BottomPosition)), // Tone Kathaka Independent Svarita
3603        0x1CD8 => (Some(Cantillation), Some(BottomPosition)), // Tone Candra Below
3604        0x1CD9 => (Some(Cantillation), Some(BottomPosition)), // Tone Kathaka Independent Svarita Schroeder
3605        0x1CDA => (Some(Cantillation), Some(TopPosition)),    // Tone Double Svarita
3606        0x1CDB => (Some(Cantillation), Some(TopPosition)),    // Tone Triple Svarita
3607        0x1CDC => (Some(Cantillation), Some(BottomPosition)), // Tone Kathaka Anudatta
3608        0x1CDD => (Some(Cantillation), Some(BottomPosition)), // Tone Dot Below
3609        0x1CDE => (Some(Cantillation), Some(BottomPosition)), // Tone Two Dots Below
3610        0x1CDF => (Some(Cantillation), Some(BottomPosition)), // Tone Three Dots Below
3611        0x1CE0 => (Some(Cantillation), Some(TopPosition)),    // Tone Rigvedic Kashmiri Independent Svarita
3612        0x1CE1 => (Some(Cantillation), Some(RightPosition)),  // Tone Atharavedic Independent Svarita
3613        0x1CE2 => (Some(Avagraha), Some(Overstruck)),         // Sign Visarga Svarita
3614        0x1CE3 => (None, Some(Overstruck)),                   // Sign Visarga Udatta
3615        0x1CE4 => (None, Some(Overstruck)),                   // Sign Reversed Visarga Udatta
3616        0x1CE5 => (None, Some(Overstruck)),                   // Sign Visarga Anudatta
3617        0x1CE6 => (None, Some(Overstruck)),                   // Sign Reversed Visarga Anudatta
3618        0x1CE7 => (None, Some(Overstruck)),                   // Sign Visarga Udatta With Tail
3619        0x1CE8 => (Some(Avagraha), Some(Overstruck)),         // Sign Visarga Anudatta With Tail
3620        0x1CE9 => (Some(Symbol), None),                       // Sign Anusvara Antargomukha
3621        0x1CEA => (None, None),                               // Sign Anusvara Bahirgomukha
3622        0x1CEB => (None, None),                               // Sign Anusvara Vamagomukha
3623        0x1CEC => (Some(Symbol), None),                       // Sign Anusvara Vamagomukha With Tail
3624        0x1CED => (Some(Avagraha), Some(BottomPosition)),     // Sign Tiryak
3625        0x1CEE => (Some(Symbol), None),                       // Sign Hexiform Long Anusvara
3626        0x1CEF => (None, None),                               // Sign Long Anusvara
3627        0x1CF0 => (None, None),                               // Sign Rthang Long Anusvara
3628        0x1CF1 => (Some(Symbol), None),                       // Sign Anusvara Ubhayato Mukha
3629        0x1CF2 => (Some(Visarga), None),                      // Sign Ardhavisarga
3630        0x1CF3 => (Some(Visarga), None),                      // Sign Rotated Ardhavisarga
3631        0x1CF4 => (Some(Cantillation), Some(TopPosition)),    // Tone Candra Above
3632        0x1CF5 => (Some(ConsonantWithStacker), None),         // Sign Jihvamuliya
3633        0x1CF6 => (Some(ConsonantWithStacker), None),         // Sign Upadhmaniya
3634        0x1CF7 => (None, None),                               // Sign Atikrama
3635        0x1CF8 => (Some(Cantillation), None),                 // Tone Ring Above
3636        0x1CF9 => (Some(Cantillation), None),                 // Tone Double Ring Above
3637
3638        // Devanagari Extended character table
3639        0xA8E0 => (Some(Cantillation), Some(TopPosition)),   // Combining Zero
3640        0xA8E1 => (Some(Cantillation), Some(TopPosition)),   // Combining One
3641        0xA8E2 => (Some(Cantillation), Some(TopPosition)),   // Combining Two
3642        0xA8E3 => (Some(Cantillation), Some(TopPosition)),   // Combining Three
3643        0xA8E4 => (Some(Cantillation), Some(TopPosition)),   // Combining Four
3644        0xA8E5 => (Some(Cantillation), Some(TopPosition)),   // Combining Five
3645        0xA8E6 => (Some(Cantillation), Some(TopPosition)),   // Combining Six
3646        0xA8E7 => (Some(Cantillation), Some(TopPosition)),   // Combining Seven
3647        0xA8E8 => (Some(Cantillation), Some(TopPosition)),   // Combining Eight
3648        0xA8E9 => (Some(Cantillation), Some(TopPosition)),   // Combining Nine
3649        0xA8EA => (Some(Cantillation), Some(TopPosition)),   // Combining A
3650        0xA8EB => (Some(Cantillation), Some(TopPosition)),   // Combining U
3651        0xA8EC => (Some(Cantillation), Some(TopPosition)),   // Combining Ka
3652        0xA8ED => (Some(Cantillation), Some(TopPosition)),   // Combining Na
3653        0xA8EE => (Some(Cantillation), Some(TopPosition)),   // Combining Pa
3654        0xA8EF => (Some(Cantillation), Some(TopPosition)),   // Combining Ra
3655        0xA8F0 => (Some(Cantillation), Some(TopPosition)),   // Combining Vi
3656        0xA8F1 => (Some(Cantillation), Some(TopPosition)),   // Combining Avagraha
3657        0xA8F2 => (Some(Bindu), None),                       // Spacing Candrabindu
3658        0xA8F3 => (Some(Bindu), None),                       // Candrabindu Virama
3659        0xA8F4 => (None, None),                              // Double Candrabindu Virama
3660        0xA8F5 => (None, None),                              // Candrabindu Two
3661        0xA8F6 => (None, None),                              // Candrabindu Three
3662        0xA8F7 => (None, None),                              // Candrabindu Avagraha
3663        0xA8F8 => (None, None),                              // Pushpika
3664        0xA8F9 => (None, None),                              // Gap Filler
3665        0xA8FA => (None, None),                              // Caret
3666        0xA8FB => (None, None),                              // Headstroke
3667        0xA8FC => (None, None),                              // Siddham
3668        0xA8FD => (None, None),                              // Jain Om
3669        0xA8FE => (Some(VowelIndependent), None),            // Ay
3670        0xA8FF => (Some(VowelDependent), Some(TopPosition)), // Sign Ay
3671
3672        // Sinhala Archaic Numbers character table
3673        0x111E0 => (None, None),         // unassigned
3674        0x111E1 => (Some(Number), None), // Archaic Digit One
3675        0x111E2 => (Some(Number), None), // Archaic Digit Two
3676        0x111E3 => (Some(Number), None), // Archaic Digit Three
3677        0x111E4 => (Some(Number), None), // Archaic Digit Four
3678        0x111E5 => (Some(Number), None), // Archaic Digit Five
3679        0x111E6 => (Some(Number), None), // Archaic Digit Six
3680        0x111E7 => (Some(Number), None), // Archaic Digit Seven
3681        0x111E8 => (Some(Number), None), // Archaic Digit Eight
3682        0x111E9 => (Some(Number), None), // Archaic Digit Nine
3683        0x111EA => (Some(Number), None), // Archaic Number Ten
3684        0x111EB => (Some(Number), None), // Archaic Number 20
3685        0x111EC => (Some(Number), None), // Archaic Number 30
3686        0x111ED => (Some(Number), None), // Archaic Number 40
3687        0x111EE => (Some(Number), None), // Archaic Number 50
3688        0x111EF => (Some(Number), None), // Archaic Number 60
3689        0x111F0 => (Some(Number), None), // Archaic Number 70
3690        0x111F1 => (Some(Number), None), // Archaic Number 80
3691        0x111F2 => (Some(Number), None), // Archaic Number 90
3692        0x111F3 => (Some(Number), None), // Archaic Number 100
3693        0x111F4 => (Some(Number), None), // Archaic Number 1000
3694        0x111F5 => (None, None),         // unassigned
3695        0x111F6 => (None, None),         // unassigned
3696        0x111F7 => (None, None),         // unassigned
3697        0x111F8 => (None, None),         // unassigned
3698        0x111F9 => (None, None),         // unassigned
3699        0x111FA => (None, None),         // unassigned
3700        0x111FB => (None, None),         // unassigned
3701        0x111FC => (None, None),         // unassigned
3702        0x111FD => (None, None),         // unassigned
3703        0x111FE => (None, None),         // unassigned
3704        0x111FF => (None, None),         // unassigned
3705
3706        // Grantha marks character table
3707        0x11301 => (Some(Bindu), Some(TopPosition)),     // Grantha Candrabindu
3708        0x11303 => (Some(Visarga), Some(RightPosition)), // Grantha Visarga
3709        0x1133B => (Some(Nukta), Some(BottomPosition)),  // Combining Bindu Below
3710        0x1133C => (Some(Nukta), Some(BottomPosition)),  // Grantha Nukta
3711
3712        // Miscellaneous character table
3713        0x00A0 => (Some(Placeholder), None),      // No-break space
3714        0x00B2 => (Some(SyllableModifier), None), // Superscript Two (used in Tamil)
3715        0x00B3 => (Some(SyllableModifier), None), // Superscript Three (used in Tamil)
3716        0x200C => (Some(NonJoiner), None),        // Zero-width non-joiner
3717        0x200D => (Some(Joiner), None),           // Zero-width joiner
3718        0x2010 => (Some(Placeholder), None),      // Hyphen
3719        0x2011 => (Some(Placeholder), None),      // No-break hyphen
3720        0x2012 => (Some(Placeholder), None),      // Figure dash
3721        0x2013 => (Some(Placeholder), None),      // En dash
3722        0x2014 => (Some(Placeholder), None),      // Em dash
3723        0x2074 => (Some(SyllableModifier), None), // Superscript Four (used in Tamil)
3724        0x2082 => (Some(SyllableModifier), None), // Subscript Two (used in Tamil)
3725        0x2083 => (Some(SyllableModifier), None), // Subscript Three (used in Tamil)
3726        0x2084 => (Some(SyllableModifier), None), // Subscript Four (used in Tamil)
3727        0x25CC => (Some(DottedCircle), None),     // Dotted circle
3728
3729        _ => (None, None),
3730    }
3731}
3732
3733/////////////////////////////////////////////////////////////////////////////
3734// Unit tests
3735/////////////////////////////////////////////////////////////////////////////
3736
3737#[cfg(test)]
3738mod tests {
3739    use super::*;
3740
3741    mod matra_pos {
3742        use super::*;
3743
3744        #[test]
3745        fn test_no_canonical_decomposition_matra() {
3746            assert_eq!(
3747                matra_pos('\u{0AC9}', Script::Gujarati),
3748                Some(Pos::AfterPost)
3749            );
3750            assert_eq!(matra_pos('\u{0B57}', Script::Oriya), Some(Pos::AfterPost));
3751        }
3752
3753        #[test]
3754        fn test_non_decomposed_matra() {
3755            // Should never happen
3756            assert_eq!(matra_pos('\u{09CB}', Script::Bengali), None);
3757        }
3758
3759        #[test]
3760        fn test_non_matra() {
3761            assert_eq!(matra_pos('\u{09B6}', Script::Bengali), None);
3762        }
3763    }
3764
3765    mod move_element {
3766        use super::*;
3767
3768        #[test]
3769        fn test_move_forward() {
3770            let mut v = [1, 2, 3, 4];
3771            move_element(&mut v, 0, 3);
3772
3773            assert_eq!([2, 3, 4, 1], v);
3774        }
3775
3776        #[test]
3777        fn test_move_backward() {
3778            let mut v = [1, 2, 3, 4];
3779            move_element(&mut v, 3, 1);
3780
3781            assert_eq!([1, 4, 2, 3], v);
3782        }
3783    }
3784
3785    mod constrain_vowel {
3786        use super::*;
3787
3788        #[test]
3789        fn test_insert_one_dotted_circle() {
3790            let mut cs = vec!['\u{0909}', '\u{0941}'];
3791            constrain_vowel(&mut cs);
3792
3793            assert_eq!(vec!['\u{0909}', '\u{25CC}', '\u{0941}'], cs);
3794        }
3795
3796        #[test]
3797        fn test_insert_two_dotted_circles() {
3798            let mut cs = vec!['\u{0909}', '\u{0941}', '\u{090F}', '\u{0945}'];
3799            constrain_vowel(&mut cs);
3800
3801            assert_eq!(
3802                vec!['\u{0909}', '\u{25CC}', '\u{0941}', '\u{090F}', '\u{25CC}', '\u{0945}'],
3803                cs
3804            );
3805        }
3806
3807        #[test]
3808        fn test_insert_dotted_circle_after_reph() {
3809            let mut cs = vec!['\u{0930}', '\u{094D}', '\u{0907}'];
3810            constrain_vowel(&mut cs);
3811
3812            assert_eq!(vec!['\u{0930}', '\u{094D}', '\u{25CC}', '\u{0907}'], cs);
3813        }
3814
3815        #[test]
3816        fn test_should_not_insert_dotted_circle() {
3817            let mut cs = vec!['\u{0930}', '\u{094D}'];
3818            constrain_vowel(&mut cs);
3819
3820            assert_eq!(vec!['\u{0930}', '\u{094D}'], cs);
3821        }
3822    }
3823
3824    mod decompose_matra {
3825        use super::*;
3826
3827        #[test]
3828        fn test_single_decomposition() {
3829            let mut cs = vec!['\u{09CB}'];
3830            decompose_matra(&mut cs);
3831
3832            assert_eq!(vec!['\u{09C7}', '\u{09BE}'], cs);
3833        }
3834
3835        #[test]
3836        fn test_double_decomposition() {
3837            let mut cs = vec!['\u{09CB}', '\u{09CB}'];
3838            decompose_matra(&mut cs);
3839
3840            assert_eq!(vec!['\u{09C7}', '\u{09BE}', '\u{09C7}', '\u{09BE}'], cs);
3841        }
3842    }
3843
3844    mod recompose_bengali_ya_nukta {
3845        use super::*;
3846
3847        #[test]
3848        fn test_single_codepoint() {
3849            let mut cs = vec!['\u{09AF}'];
3850            recompose_bengali_ya_nukta(&mut cs);
3851
3852            assert_eq!(vec!['\u{09AF}'], cs);
3853        }
3854
3855        #[test]
3856        fn test_ya_nukta_ya() {
3857            let mut cs = vec!['\u{09AF}', '\u{09BC}', '\u{09AF}'];
3858            recompose_bengali_ya_nukta(&mut cs);
3859
3860            assert_eq!(vec!['\u{09DF}', '\u{09AF}'], cs);
3861        }
3862
3863        #[test]
3864        fn test_ya_ya_nukta() {
3865            let mut cs = vec!['\u{09AF}', '\u{09AF}', '\u{09BC}'];
3866            recompose_bengali_ya_nukta(&mut cs);
3867
3868            assert_eq!(vec!['\u{09AF}', '\u{09DF}'], cs);
3869        }
3870    }
3871
3872    mod reorder_kannada_ra_halant_zwj {
3873        use super::*;
3874
3875        const R: char = '\u{0CB0}';
3876        const H: char = '\u{0CCD}';
3877        const Z: char = '\u{200D}';
3878
3879        #[test]
3880        fn test_ra_halant() {
3881            let mut cs = vec![R, H];
3882            reorder_kannada_ra_halant_zwj(&mut cs);
3883
3884            assert_eq!(vec![R, H], cs);
3885        }
3886
3887        #[test]
3888        fn test_ra_halant_zwj() {
3889            let mut cs = vec![R, H, Z];
3890            reorder_kannada_ra_halant_zwj(&mut cs);
3891
3892            assert_eq!(vec![R, Z, H], cs);
3893        }
3894
3895        #[test]
3896        fn test_non_initial_ra_halant_zwj() {
3897            let mut cs = vec![R, H, R, H, Z];
3898            reorder_kannada_ra_halant_zwj(&mut cs);
3899
3900            assert_eq!(vec![R, H, R, H, Z], cs);
3901        }
3902    }
3903}