ucd_util/
ideograph.rs

1/// A set of ranges that corresponds to the set of all ideograph codepoints.
2///
3/// These ranges are defined in Unicode 4.8 Table 4-13.
4pub const RANGE_IDEOGRAPH: &'static [(u32, u32)] = &[
5    (0x3400, 0x4DB5),
6    (0x4E00, 0x9FD5),
7    (0x4E00, 0x9FD5),
8    (0x20000, 0x2A6D6),
9    (0x2A700, 0x2B734),
10    (0x2B740, 0x2B81D),
11    (0x2B820, 0x2CEA1),
12    (0x17000, 0x187EC),
13    (0xF900, 0xFA6D),
14    (0xFA70, 0xFAD9),
15    (0x2F800, 0x2FA1D),
16];
17
18/// Return the character name of the given ideograph codepoint.
19///
20/// This operation is only defined on ideographic codepoints. This includes
21/// precisely the following inclusive ranges:
22///
23/// * `3400..4DB5`
24/// * `4E00..9FD5`
25/// * `20000..2A6D6`
26/// * `2A700..2B734`
27/// * `2B740..2B81D`
28/// * `2B820..2CEA1`
29/// * `17000..187EC`
30/// * `F900..FA6D`
31/// * `FA70..FAD9`
32/// * `2F800..2FA1D`
33///
34/// If the given codepoint is not in any of the above ranges, then `None` is
35/// returned.
36///
37/// This implements the algorithm described in Unicode 4.8.
38pub fn ideograph_name(cp: u32) -> Option<String> {
39    // This match should be in sync with the `RANGE_IDEOGRAPH` constant.
40    match cp {
41        0x3400..=0x4DB5
42        | 0x4E00..=0x9FD5
43        | 0x20000..=0x2A6D6
44        | 0x2A700..=0x2B734
45        | 0x2B740..=0x2B81D
46        | 0x2B820..=0x2CEA1 => {
47            Some(format!("CJK UNIFIED IDEOGRAPH-{:04X}", cp))
48        }
49        0x17000..=0x187EC => Some(format!("TANGUT IDEOGRAPH-{:04X}", cp)),
50        0xF900..=0xFA6D | 0xFA70..=0xFAD9 | 0x2F800..=0x2FA1D => {
51            Some(format!("CJK COMPATIBILITY IDEOGRAPH-{:04X}", cp))
52        }
53        _ => None,
54    }
55}
56
57#[cfg(test)]
58mod tests {
59    use super::ideograph_name;
60
61    #[test]
62    fn name() {
63        assert_eq!(
64            ideograph_name(0x4E00).unwrap(),
65            "CJK UNIFIED IDEOGRAPH-4E00"
66        );
67        assert_eq!(
68            ideograph_name(0x9FD5).unwrap(),
69            "CJK UNIFIED IDEOGRAPH-9FD5"
70        );
71        assert_eq!(ideograph_name(0x17000).unwrap(), "TANGUT IDEOGRAPH-17000");
72        assert_eq!(
73            ideograph_name(0xF900).unwrap(),
74            "CJK COMPATIBILITY IDEOGRAPH-F900"
75        );
76    }
77
78    #[test]
79    fn invalid() {
80        assert!(ideograph_name(0).is_none());
81    }
82}