ucd_util/
hangul.rs

1// This implementation should correspond to the algorithms described in
2// Unicode 3.12.
3
4/// A set of ranges that corresponds to the set of all Hangul syllable
5/// codepoints.
6///
7/// These ranges are defined in Unicode 4.8 Table 4-13.
8pub const RANGE_HANGUL_SYLLABLE: &'static [(u32, u32)] = &[(0xAC00, 0xD7A3)];
9
10const S_BASE: u32 = 0xAC00;
11const L_BASE: u32 = 0x1100;
12const V_BASE: u32 = 0x1161;
13const T_BASE: u32 = 0x11A7;
14const T_COUNT: u32 = 28;
15const N_COUNT: u32 = 588;
16
17/// Return the character name of the given precomposed Hangul codepoint.
18///
19/// If the given codepoint does not correspond to a precomposed Hangul
20/// codepoint in the inclusive range `AC00..D7A3`, then this returns `None`.
21///
22/// This implements the algorithms described in Unicode 3.12 and Unicode 4.8.
23///
24/// The `table` given should be a map from codepoint to the corresponding
25/// Jamo short name for that codepoint. If you're using `ucd-generate`, then
26/// the table can be generated via the `jamo-short-name` sub-command.
27pub fn hangul_name<'a>(
28    table: &'a [(u32, &'a str)],
29    cp: u32,
30) -> Option<String> {
31    let mut name = "HANGUL SYLLABLE ".to_string();
32    let (lpart, vpart, tpart) = match hangul_full_canonical_decomposition(cp) {
33        None => return None,
34        Some(triple) => triple,
35    };
36
37    name.push_str(jamo_short_name(table, lpart));
38    name.push_str(jamo_short_name(table, vpart));
39    name.push_str(tpart.map_or("", |cp| jamo_short_name(table, cp)));
40    Some(name)
41}
42
43/// Return the full canonical decomposition of the given precomposed Hangul
44/// codepoint.
45///
46/// If the decomposition does not have any trailing consonant, then the third
47/// part of the tuple returned is `None`.
48///
49/// If the given codepoint does not correspond to a precomposed Hangul
50/// codepoint in the inclusive range `AC00..D7A3`, then this returns `None`.
51///
52/// This implements the algorithms described in Unicode 3.12 and Unicode 4.8.
53pub fn hangul_full_canonical_decomposition(
54    cp: u32,
55) -> Option<(u32, u32, Option<u32>)> {
56    if !(0xAC00 <= cp && cp <= 0xD7A3) {
57        return None;
58    }
59
60    let s_index = cp - S_BASE;
61    let l_index = s_index / N_COUNT;
62    let v_index = (s_index % N_COUNT) / T_COUNT;
63    let t_index = s_index % T_COUNT;
64
65    let l_part = L_BASE + l_index;
66    let v_part = V_BASE + v_index;
67    let t_part = if t_index == 0 { None } else { Some(T_BASE + t_index) };
68    Some((l_part, v_part, t_part))
69}
70
71type JamoShortName<'a> = &'a [(u32, &'a str)];
72
73fn jamo_short_name<'a>(table: JamoShortName<'a>, cp: u32) -> &'a str {
74    let i = table.binary_search_by_key(&cp, |p| p.0).unwrap();
75    table[i].1
76}
77
78#[cfg(test)]
79mod tests {
80    use crate::unicode_tables::jamo_short_name::JAMO_SHORT_NAME as TABLE;
81
82    use super::{hangul_full_canonical_decomposition, hangul_name};
83
84    #[test]
85    fn canon_decomp() {
86        assert_eq!(
87            hangul_full_canonical_decomposition(0xD4DB),
88            Some((0x1111, 0x1171, Some(0x11B6)))
89        );
90    }
91
92    #[test]
93    fn name() {
94        assert_eq!(
95            hangul_name(TABLE, 0xD4DB).unwrap(),
96            "HANGUL SYLLABLE PWILH"
97        );
98    }
99
100    #[test]
101    fn all() {
102        for cp in 0xAC00..(0xD7A3 + 1) {
103            hangul_name(TABLE, cp).unwrap();
104        }
105    }
106
107    #[test]
108    fn invalid() {
109        assert!(hangul_name(TABLE, 0).is_none());
110    }
111}