ucd/
lib.rs

1#![no_std]
2use core::char;
3use core::slice;
4
5pub mod tables;
6use tables::Search;
7pub use tables::{
8    BidiClass,
9    BidiPairedBracketType,
10    DecompositionType,
11    EastAsianWidth,
12    GraphemeClusterBreak,
13    HangulSyllableType,
14    IndicPositionalCategory,
15    IndicSyllabicCategory,
16    JoiningGroup,
17    JoiningType,
18    LinebreakClass,
19    NumericType,
20    Script,
21    SentenceBreak,
22    Trilean,
23    UnicodeBlock,
24    UnicodeCategory,
25    WordBreak
26};
27
28// for use with numeric_value
29#[derive(Clone,Copy,Eq,PartialEq,Debug,Ord,PartialOrd)]
30pub enum Number {
31    Integer(i64),
32    Rational(i32,u32)
33}
34
35fn cp_decode((c1,c2,c3): (u8,u8,u8)) -> char {
36    let c = (c1 as u32)*65536 + (c2 as u32)*256 + (c3 as u32);
37    unsafe { char::from_u32_unchecked(c) }
38}
39
40enum CharIterInternal {
41    Iterator(slice::Iter<'static, (u8,u8,u8)>),
42    Double(char, char),
43    Single(char),
44    Exhausted
45}
46
47pub struct CharIter(CharIterInternal);
48
49impl CharIter {
50    pub fn new(osl: Option<&'static [(u8,u8,u8)]>, cp: char) -> CharIter {
51        CharIter(match osl {
52            Some(sl) => CharIterInternal::Iterator(sl.iter()),
53            None => CharIterInternal::Single(cp)
54        })
55    }
56
57    pub fn hangul(a: char, b: char) -> CharIter {
58        CharIter(CharIterInternal::Double(a, b))
59    }
60}
61
62impl Iterator for CharIter {
63    type Item = char;
64
65    fn next(&mut self) -> Option<char> {
66        match self.0 {
67            CharIterInternal::Iterator(ref mut it) => it.next().map(|c| cp_decode(*c)),
68            CharIterInternal::Double(a, b) => {
69                self.0 = CharIterInternal::Single(b);
70                Some(a)
71            },
72            CharIterInternal::Single(c) => {
73                self.0 = CharIterInternal::Exhausted;
74                Some(c)
75            },
76            CharIterInternal::Exhausted => None
77        }
78    }
79
80    fn size_hint(&self) -> (usize, Option<usize>) {
81        match self.0 {
82            CharIterInternal::Iterator(ref it) => it.size_hint(),
83            CharIterInternal::Double(_, _) => (2, Some(2)),
84            CharIterInternal::Single(_) => (1, Some(1)),
85            CharIterInternal::Exhausted => (0, Some(0))
86        }
87    }
88}
89
90pub trait Codepoint where Self: core::marker::Sized {
91    // general
92    fn age(self) -> Option<(u8,u8)>;
93    fn block(self) -> Option<UnicodeBlock>;
94    fn category(self) -> UnicodeCategory;
95    fn codepoint(self) -> char;
96    fn iso_comment(self) -> &'static str;
97
98    // function and appearance
99    fn is_alphabetic(self) -> bool;
100    fn is_alphabetic_other(self) -> bool;
101    fn is_dash(self) -> bool;
102    fn is_default_ignorable(self) -> bool;
103    fn is_default_ignorable_other(self) -> bool;
104    fn is_deprecated(self) -> bool;
105    fn is_diacritic(self) -> bool;
106    fn is_extender(self) -> bool;
107    fn is_hex_digit(self) -> bool;
108    fn is_hex_digit_ascii(self) -> bool;
109    fn is_hyphen(self) -> bool;
110    fn is_logical_order_exception(self) -> bool;
111    fn is_math(self) -> bool;
112    fn is_math_other(self) -> bool;
113    fn is_noncharacter(self) -> bool;
114    fn is_preprended_concatenation_mark(self) -> bool;
115    fn is_quotation_mark(self) -> bool;
116    fn is_sentence_terminal(self) -> bool;
117    fn is_soft_dotted(self) -> bool;
118    fn is_terminal_punctuation(self) -> bool;
119    fn is_variation_selector(self) -> bool;
120    fn is_whitespace(self) -> bool;
121
122    // numeric
123    fn numeric_type(self) -> Option<NumericType>;
124    fn numeric_value(self) -> Option<Number>;
125
126    // identifiers and syntax
127    fn is_id_continue(self) -> bool;
128    fn is_id_continue_nfkc(self) -> bool;
129    fn is_id_continue_other(self) -> bool;
130    fn is_id_start(self) -> bool;
131    fn is_id_start_nfkc(self) -> bool;
132    fn is_id_start_other(self) -> bool;
133    fn is_pattern_syntax(self) -> bool;
134    fn is_pattern_whitespace(self) -> bool;
135
136    // scripts
137    fn east_asian_width(self) -> EastAsianWidth;
138    fn hangul_syllable_type(self) -> Option<HangulSyllableType>;
139    fn jamo_short_name(self) -> Option<&'static str>;
140    fn indic_positional_category(self) -> Option<IndicPositionalCategory>;
141    fn indic_syllabic_category(self) -> IndicSyllabicCategory;
142    fn is_ideograph(self) -> bool;
143    fn is_ideograph_description_sequence_binary_operator(self) -> bool;
144    fn is_ideograph_description_sequence_radical(self) -> bool;
145    fn is_ideograph_description_sequence_trinary_operator(self) -> bool;
146    fn is_ideograph_unified(self) -> bool;
147    fn join_control(self) -> bool;
148    fn joining_group(self) -> JoiningGroup;
149    fn joining_type(self) -> JoiningType;
150    fn script(self) -> Option<Script>;
151    fn script_extensions(self) -> Option<&'static [Script]>;
152
153    // bidirectionality
154    fn bidi_class(self) -> BidiClass;
155    fn bidi_is_control(self) -> bool;
156    fn bidi_is_mirrored(self) -> bool;
157    fn bidi_mirror(self) -> Option<char>;
158    fn bidi_paired_bracket(self) -> char;
159    fn bidi_paired_bracket_type(self) -> Option<BidiPairedBracketType>;
160
161    // case
162    fn casefold(self) -> CharIter;
163    fn casefold_nfkc(self) -> CharIter;
164    fn casefold_nfkc_closure(self) -> CharIter;
165    fn casefold_simple(self) -> char;
166    fn changes_when_casefolded(self) -> bool;
167    fn changes_when_casefolded_nfkc(self) -> bool;
168    fn changes_when_casemapped(self) -> bool;
169    fn changes_when_lowercased(self) -> bool;
170    fn changes_when_titlecased(self) -> bool;
171    fn changes_when_uppercased(self) -> bool;
172    fn is_case_ignorable(self) -> bool;
173    fn is_cased(self) -> bool;
174    fn is_lowercase(self) -> bool;
175    fn is_lowercase_other(self) -> bool;
176    fn is_uppercase(self) -> bool;
177    fn is_uppercase_other (self) -> bool;
178    fn lowercase(self) -> CharIter;
179    fn lowercase_simple(self) -> char;
180    fn titlecase(self) -> CharIter;
181    fn titlecase_simple(self) -> char;
182    fn uppercase(self) -> CharIter;
183    fn uppercase_simple(self) -> char;
184
185    // normalisation
186    fn canonical_combining_class(self) -> u8;
187    fn decomposition_map(self) -> CharIter;
188    fn decomposition_type(self) -> Option<DecompositionType>;
189    fn excluded_from_composition(self) -> bool;
190    fn excluded_from_composition_fully(self) -> bool;
191    fn expands_on_nfc(self) -> bool;
192    fn expands_on_nfd(self) -> bool;
193    fn expands_on_nfkc(self) -> bool;
194    fn expands_on_nfkd(self) -> bool;
195    fn quick_check_nfc(self) -> Trilean;
196    fn quick_check_nfd(self) -> bool;
197    fn quick_check_nfkc(self) -> Trilean;
198    fn quick_check_nfkd(self) -> bool;
199
200    // segmentation
201    fn grapheme_cluster_break(self) -> GraphemeClusterBreak;
202    fn is_grapheme_base(self) -> bool;
203    fn is_grapheme_extend(self) -> bool;
204    fn is_grapheme_extend_other(self) -> bool;
205    fn is_grapheme_link(self) -> bool;
206    fn linebreak_class(self) -> Option<LinebreakClass>;
207    fn sentence_break(self) -> SentenceBreak;
208    fn word_break(self) -> WordBreak;
209
210    // account for inbuilt char methods, which seem to be for unicode 8.0
211    fn is_alpha(self) -> bool { Codepoint::is_alphabetic(self) }
212    fn is_lower(self) -> bool { Codepoint::is_lowercase(self) }
213    fn is_upper(self) -> bool { Codepoint::is_uppercase(self) }
214    fn is_white(self) -> bool { Codepoint::is_whitespace(self) }
215}
216
217impl Codepoint for char {
218    // general
219    fn age(self) -> Option<(u8,u8)> {
220        tables::UCD_AGE.search(self)
221    }
222
223    fn block(self) -> Option<UnicodeBlock> {
224        tables::UCD_BLOCK.search(self)
225    }
226
227    fn category(self) -> UnicodeCategory {
228        tables::UCD_CAT.search(self)
229            .unwrap_or(UnicodeCategory::Unassigned)
230    }
231
232    fn codepoint(self) -> char {
233        self
234    }
235
236    fn iso_comment(self) -> &'static str {
237        ""
238    }
239
240
241
242
243    // function and appearance
244    fn is_alphabetic(self) -> bool {
245        tables::UCD_ALPHA.includes(self)
246    }
247
248    fn is_alphabetic_other(self) -> bool {
249        tables::UCD_ALPHA_OTHER.includes(self)
250    }
251
252    fn is_dash(self) -> bool {
253        tables::UCD_DASH.includes(self)
254    }
255
256    fn is_default_ignorable(self) -> bool {
257        tables::UCD_DEFAULT_IGNORABLE.includes(self)
258    }
259
260    fn is_default_ignorable_other(self) -> bool {
261        tables::UCD_DEFAULT_IGNORABLE_OTHER.includes(self)
262    }
263
264    fn is_deprecated(self) -> bool {
265        match self as u32 {
266            329 | 1651 | 3959 | 3961 | 6051 | 6052 |
267                  8298...8303 | 9001 | 9002 | 917505 => true,
268            _ => false
269        }
270    }
271    fn is_diacritic(self) -> bool {
272        tables::UCD_DIACRITIC.includes(self)
273    }
274
275    fn is_extender(self) -> bool {
276        tables::UCD_EXTENDER.includes(self)
277    }
278
279    fn is_hex_digit(self) -> bool {
280        tables::UCD_HEX_DIGIT.includes(self)
281    }
282
283    fn is_hex_digit_ascii(self) -> bool {
284        tables::UCD_HEX_DIGIT_ASCII.includes(self)
285    }
286
287    fn is_hyphen(self) -> bool {
288        tables::UCD_HYPHEN.includes(self)
289    }
290
291    fn is_logical_order_exception(self) -> bool {
292        tables::UCD_LOGICAL_ORDER_EXCEPTION.includes(self)
293    }
294
295    fn is_math(self) -> bool {
296        tables::UCD_MATH.includes(self)
297    }
298
299    fn is_math_other(self) -> bool {
300        tables::UCD_MATH_OTHER.includes(self)
301    }
302
303    fn is_noncharacter(self) -> bool {
304        let cp = self as u32;
305        (cp >= 0xfdd0 && cp <= 0xfdef)
306            || ((cp & 0xffff) >= 0xfffe)
307    }
308
309    fn is_preprended_concatenation_mark(self) -> bool {
310        tables::UCD_PREPENDED_CONCATENATION_MARK.includes(self)
311    }
312
313    fn is_quotation_mark(self) -> bool {
314        tables::UCD_QUOT.includes(self)
315    }
316
317    fn is_sentence_terminal(self) -> bool {
318        tables::UCD_TERM_SENTENCE.includes(self)
319    }
320
321    fn is_soft_dotted(self) -> bool {
322        tables::UCD_SOFT_DOTTED.includes(self)
323    }
324
325    fn is_terminal_punctuation(self) -> bool {
326        tables::UCD_TERM_PUNC.includes(self)
327    }
328
329    fn is_variation_selector(self) -> bool {
330        let cp = self as u32;
331        (cp >= 917760 && cp <= 917999)
332            || (cp >= 65024 && cp <= 65039)
333            || (cp >= 6155 && cp <= 6157)
334    }
335
336    fn is_whitespace(self) -> bool {
337        tables::UCD_WHITE.includes(self)
338    }
339
340
341
342
343    // numeric
344    fn numeric_type(self) -> Option<NumericType> {
345        tables::UCD_NUMTYPE.search(self)
346    }
347
348    fn numeric_value(self) -> Option<Number> {
349        tables::UCD_NUMVAL.search(self).map(|i| {
350            match tables::UCD_NUMS[i as usize] {
351                (num, 1) => Number::Integer(num),
352                (num, den) => Number::Rational(num as i32, den as u32)
353            }
354        })
355    }
356
357
358
359    // identifiers and syntax
360    fn is_id_continue(self) -> bool {
361        tables::UCD_ID_CONT.includes(self)
362    }
363
364    fn is_id_continue_nfkc(self) -> bool {
365        tables::UCD_ID_CONT_NFKC.includes(self)
366    }
367
368    fn is_id_continue_other(self) -> bool {
369         match self as u32 {
370            183 | 903 | 4969...4977 | 6618 => true,
371            _ => false
372        }
373    }
374
375    fn is_id_start(self) -> bool {
376        tables::UCD_ID_START.includes(self)
377    }
378
379    fn is_id_start_nfkc(self) -> bool {
380        tables::UCD_ID_START_NFKC.includes(self)
381    }
382
383    fn is_id_start_other(self) -> bool {
384        match self as u32 {
385            6277 | 6278 | 8472 | 8494| 12443 | 12444 => true,
386            _ => false
387        }
388    }
389
390    fn is_pattern_syntax(self) -> bool {
391        tables::UCD_PATT_SYNTAX.includes(self)
392    }
393
394    fn is_pattern_whitespace(self) -> bool {
395        match self as u32 {
396            9...13 | 32 | 133 | 8206
397                   | 8207 | 8232 | 8233 => true,
398            _ => false
399        }
400    }
401
402
403
404
405    // scripts
406    fn east_asian_width(self) -> EastAsianWidth {
407        tables::UCD_EAWIDTH.search(self)
408            .unwrap_or(EastAsianWidth::Neutral)
409    }
410
411    fn hangul_syllable_type(self) -> Option<HangulSyllableType> {
412        let cp = self as u32;
413        match cp {
414            4352...4447 | 43360...43388 => Some(HangulSyllableType::LeadingJamo),
415            4448...4519 | 55216...55238 => Some(HangulSyllableType::VowelJamo),
416            4520...4607 | 55243...55291 => Some(HangulSyllableType::TrailingJamo),
417            44032...55203 => Some({
418                if cp % 28 == 16 { HangulSyllableType::LVSyllable }
419                else { HangulSyllableType::LVTSyllable }
420            }),
421            _ => None
422        }
423    }
424
425    fn jamo_short_name(self) -> Option<&'static str> {
426        tables::UCD_JSN.search(self)
427    }
428
429    fn indic_positional_category(self) -> Option<IndicPositionalCategory> {
430        tables::UCD_INPC.search(self)
431    }
432
433    fn indic_syllabic_category(self) -> IndicSyllabicCategory {
434        tables::UCD_INSC.search(self)
435            .unwrap_or(IndicSyllabicCategory::Other)
436    }
437
438    fn is_ideograph(self) -> bool {
439        tables::UCD_IDEO.includes(self)
440    }
441
442    fn is_ideograph_description_sequence_binary_operator(self) -> bool {
443        match self as u32 {
444            12272 | 12273 | 12276...12283 => true,
445            _ => false
446        }
447    }
448
449    fn is_ideograph_description_sequence_radical(self) -> bool {
450        match self as u32 {
451            11904...11929 | 11931...12019 | 12032...12245 => true,
452            _ => false
453        }
454    }
455
456    fn is_ideograph_description_sequence_trinary_operator(self) -> bool {
457        let cp = self as u32;
458        cp == 12274 || cp == 12275
459    }
460
461    fn is_ideograph_unified(self) -> bool {
462        tables::UCD_IDEO_UNIFIED.includes(self)
463    }
464
465    fn join_control(self) -> bool {
466        let cp = self as u32;
467        cp == 8204 || cp == 8205
468    }
469
470    fn joining_group(self) -> JoiningGroup {
471        tables::UCD_JOINGRP.search(self)
472            .unwrap_or(JoiningGroup::NoJoiningGroup)
473    }
474
475    fn joining_type(self) -> JoiningType {
476        tables::UCD_JOINTYPE.search(self)
477            .unwrap_or(JoiningType::NonJoining)
478    }
479
480    fn script(self) -> Option<Script> {
481        tables::UCD_SCRIPT.search(self)
482    }
483
484    fn script_extensions(self) -> Option<&'static [Script]> {
485        match tables::UCD_SCRIPTEXT.search(self) {
486            None => self.script().map(
487                |s| tables::UCD_SCRIPT_MAP[s as usize]),
488            x => x
489        }
490    }
491
492
493
494
495    // bidirectionality
496    fn bidi_class(self) -> BidiClass {
497        tables::UCD_BIDI_CLASS.search(self)
498            .unwrap_or(BidiClass::LeftToRight)
499    }
500
501    fn bidi_is_control(self) -> bool {
502        match self as u32 {
503            1564 | 8206 | 8207 | 8234...8238 | 8294...8297 => true,
504            _ => false
505        }
506    }
507
508    fn bidi_is_mirrored(self) -> bool {
509        tables::UCD_BIDI_MIRRORED.includes(self)
510    }
511
512    fn bidi_mirror(self) -> Option<char> {
513        tables::UCD_BIDI_MIRROR.search(self)
514    }
515
516    fn bidi_paired_bracket(self) -> char {
517        tables::UCD_BIDI_PAIRED.search(self)
518            .unwrap_or(self)
519    }
520
521    fn bidi_paired_bracket_type(self) -> Option<BidiPairedBracketType> {
522        tables::UCD_BIDI_BRATYPE.search(self)
523    }
524
525
526
527
528    // case
529    fn casefold(self) -> CharIter {
530        CharIter::new(tables::UCD_CASE_FD.search(self), self)
531    }
532
533    fn casefold_nfkc(self) -> CharIter {
534        CharIter::new(tables::UCD_CASE_FD_NFKC.search(self), self)
535    }
536
537    fn casefold_nfkc_closure(self) -> CharIter {
538        CharIter::new(tables::UCD_CASE_FD_CLOS.search(self), self)
539    }
540
541    fn casefold_simple(self) -> char {
542        tables::UCD_CASE_SIMP_FD.search(self)
543            .map(cp_decode)
544            .unwrap_or(self.codepoint())
545    }
546
547    fn changes_when_casefolded(self) -> bool {
548        tables::UCD_CASE_CHANGES_CASEFOLD.includes(self)
549    }
550
551    fn changes_when_casefolded_nfkc(self) -> bool {
552        tables::UCD_CASE_CHANGES_CASEFOLD_NFKC.includes(self)
553    }
554
555    fn changes_when_casemapped(self) -> bool {
556        tables::UCD_CASE_CHANGES_CASEMAP.includes(self)
557    }
558
559    fn changes_when_lowercased(self) -> bool {
560        tables::UCD_CASE_CHANGES_LOWER.includes(self)
561    }
562
563    fn changes_when_titlecased(self) -> bool {
564        tables::UCD_CASE_CHANGES_TITLE.includes(self)
565    }
566
567    fn changes_when_uppercased(self) -> bool {
568        tables::UCD_CASE_CHANGES_UPPER.includes(self)
569    }
570
571    fn is_case_ignorable(self) -> bool {
572        tables::UCD_CASE_IGNORABLE.includes(self)
573    }
574
575    fn is_cased(self) -> bool {
576        tables::UCD_CASED.includes(self)
577    }
578
579    fn is_lowercase(self) -> bool {
580        tables::UCD_CASE_IS_LOWER.includes(self)
581    }
582
583    fn is_lowercase_other(self) -> bool {
584        tables::UCD_CASE_IS_LOWER_OTHER.includes(self)
585    }
586
587    fn is_uppercase(self) -> bool {
588        tables::UCD_CASE_IS_UPPER.includes(self)
589    }
590
591    fn is_uppercase_other (self) -> bool {
592        match self as u32 {
593            8544...8559 | 9398...9423 | 127280...127305
594                        | 127312...127337 | 127344...127369 => true,
595            _ => false
596        }
597    }
598
599    fn lowercase(self) -> CharIter {
600        CharIter::new(tables::UCD_CASE_LW.search(self), self)
601    }
602
603    fn lowercase_simple(self) -> char {
604        tables::UCD_CASE_SIMP_LW.search(self)
605            .map(cp_decode)
606            .unwrap_or(self.codepoint())
607    }
608
609    fn titlecase(self) -> CharIter {
610        CharIter::new(tables::UCD_CASE_TI.search(self), self)
611    }
612
613    fn titlecase_simple(self) -> char {
614        tables::UCD_CASE_SIMP_TI.search(self)
615            .map(cp_decode)
616            .unwrap_or(self.codepoint())
617    }
618
619    fn uppercase(self) -> CharIter {
620        CharIter::new(tables::UCD_CASE_UP.search(self), self)
621    }
622
623    fn uppercase_simple(self) -> char {
624        tables::UCD_CASE_SIMP_UP.search(self)
625            .map(cp_decode)
626            .unwrap_or(self.codepoint())
627    }
628
629
630
631
632    // normalisation
633    fn canonical_combining_class(self) -> u8 {
634        tables::UCD_COMBCLS.search(self)
635            .unwrap_or(0)
636    }
637
638    fn decomposition_map(self) -> CharIter {
639        // manually handle arithmetic decomposition mapping
640        // for hangul syllables, cutting out 11172 entries
641        // from the data table; implementation is directly
642        // from the unicode standard, chapter 3.12
643        const SBASE: u32 = 0xac00;
644        const LBASE: u32 = 0x1100;
645        const VBASE: u32 = 0x1161;
646        const TBASE: u32 = 0x11a7;
647        const VCOUNT: u32 = 21;
648        const TCOUNT: u32 = 28;
649        const NCOUNT: u32 = VCOUNT * TCOUNT;
650
651        match self.hangul_syllable_type() {
652            Some(HangulSyllableType::LVSyllable) => unsafe {
653                let sindex = (self as u32) - SBASE;
654                let lindex = sindex / NCOUNT;
655                let vindex = (sindex % NCOUNT) / TCOUNT;
656                CharIter::hangul(
657                    char::from_u32_unchecked(LBASE + lindex),
658                    char::from_u32_unchecked(VBASE + vindex))
659            },
660            Some(HangulSyllableType::LVTSyllable) => unsafe {
661                let sindex = (self as u32) - SBASE;
662                let tindex = sindex % TCOUNT;
663                let lvindex = sindex - tindex;
664                CharIter::hangul(
665                    char::from_u32_unchecked(SBASE + lvindex),
666                    char::from_u32_unchecked(TBASE + tindex))
667            },
668            _ =>
669                CharIter::new(tables::UCD_DECOMP_MAP.search(self), self)
670        }
671    }
672
673    fn decomposition_type(self) -> Option<DecompositionType> {
674        tables::UCD_DECOMP_TYPE.search(self)
675    }
676
677    fn excluded_from_composition(self) -> bool {
678        tables::UCD_COMP_EXCL.includes(self)
679    }
680
681    fn excluded_from_composition_fully(self) -> bool {
682        tables::UCD_COMP_EXCL_FULL.includes(self)
683    }
684
685    fn expands_on_nfc(self) -> bool {
686        tables::UCD_EXPANDING_NFC.includes(self)
687    }
688
689    fn expands_on_nfd(self) -> bool {
690        tables::UCD_EXPANDING_NFD.includes(self)
691    }
692
693    fn expands_on_nfkc(self) -> bool {
694        tables::UCD_EXPANDING_NFKC.includes(self)
695    }
696
697    fn expands_on_nfkd(self) -> bool {
698        tables::UCD_EXPANDING_NFKD.includes(self)
699    }
700
701    fn quick_check_nfc(self) -> Trilean {
702        tables::UCD_QNFC.search(self)
703            .unwrap_or(Trilean::True)
704    }
705
706    fn quick_check_nfd(self) -> bool {
707        !tables::UCD_QUICK_NFD.includes(self)
708    }
709
710    fn quick_check_nfkc(self) -> Trilean {
711        match tables::UCD_QNFKC.includes(self) {
712            true => Trilean::False,
713            false => self.quick_check_nfc()
714        }
715    }
716
717    fn quick_check_nfkd(self) -> bool {
718        !tables::UCD_QUICK_NFKD.includes(self)
719    }
720
721
722
723
724    // segmentation
725    fn grapheme_cluster_break(self) -> GraphemeClusterBreak {
726        let cx = self.clone();
727        match self.hangul_syllable_type() {
728            Some(HangulSyllableType::LeadingJamo)  => GraphemeClusterBreak::LeadingJamo,
729            Some(HangulSyllableType::VowelJamo)    => GraphemeClusterBreak::VowelJamo,
730            Some(HangulSyllableType::TrailingJamo) => GraphemeClusterBreak::TrailingJamo,
731            Some(HangulSyllableType::LVSyllable)   => GraphemeClusterBreak::LVHangulSyllable,
732            Some(HangulSyllableType::LVTSyllable)  => GraphemeClusterBreak::LVTHangulSyllable,
733            None => tables::UCD_GCB.search(cx)
734                        .unwrap_or(GraphemeClusterBreak::Other)
735        }
736    }
737
738    fn is_grapheme_base(self) -> bool {
739        tables::UCD_GRAPH_BASE.includes(self)
740    }
741
742    fn is_grapheme_extend(self) -> bool {
743        tables::UCD_GRAPH_EXT.includes(self)
744    }
745
746    fn is_grapheme_extend_other(self) -> bool {
747        tables::UCD_GRAPH_EXT_OTHER.includes(self)
748    }
749
750    fn is_grapheme_link(self) -> bool {
751        tables::UCD_GRAPH_LINK.includes(self)
752    }
753
754    fn linebreak_class(self) -> Option<LinebreakClass> {
755        tables::UCD_LB.search(self)
756    }
757
758    fn sentence_break(self) -> SentenceBreak {
759        tables::UCD_SBRK.search(self)
760            .unwrap_or(SentenceBreak::Other)
761    }
762
763    fn word_break(self) -> WordBreak {
764        tables::UCD_WBRK.search(self)
765            .unwrap_or(WordBreak::Other)
766    }
767}