1#![deny(warnings, missing_docs, missing_debug_implementations)]
2use runestr::rune;
14
15#[allow(dead_code)]
16mod tables;
17
18#[derive(Clone, Copy, Debug)]
20pub enum PanCJKVRegion {
21 XK,
23 CN,
25 SG,
27 TW,
29 HK,
31 MO,
33 MY,
35 JP,
37 KR,
39 KP,
41 VN,
43}
44
45const PAN_CJKV_REGION_DATA: &[(PanCJKVRegion, char)] = &[
46 (PanCJKVRegion::XK, '\u{E01EF}'),
47 (PanCJKVRegion::CN, '\u{E01EE}'),
48 (PanCJKVRegion::SG, '\u{E01ED}'),
49 (PanCJKVRegion::TW, '\u{E01EC}'),
50 (PanCJKVRegion::HK, '\u{E01EB}'),
51 (PanCJKVRegion::MO, '\u{E01EA}'),
52 (PanCJKVRegion::MY, '\u{E01E9}'),
53 (PanCJKVRegion::JP, '\u{E01E8}'),
54 (PanCJKVRegion::KR, '\u{E01E7}'),
55 (PanCJKVRegion::KP, '\u{E01E6}'),
56 (PanCJKVRegion::VN, '\u{E01E5}'),
57];
58
59#[allow(dead_code)]
60const PAN_CJKV_REGION_COUNT: usize = PAN_CJKV_REGION_DATA.len();
61
62pub trait PanCJKVAnnotate: Sized {
64 fn annotate_with_pan_cjkv_region(self, region: PanCJKVRegion) -> PanCJKVAnnotateIter<Self>;
67}
68
69impl<I> PanCJKVAnnotate for I
70where
71 I: Iterator<Item = rune>,
72{
73 fn annotate_with_pan_cjkv_region(self, region: PanCJKVRegion) -> PanCJKVAnnotateIter<Self> {
74 PanCJKVAnnotateIter {
75 runes: self,
76 region_vs: PAN_CJKV_REGION_DATA[region as usize].1,
77 }
78 }
79}
80
81#[derive(Debug)]
84pub struct PanCJKVAnnotateIter<I> {
85 runes: I,
86 region_vs: char,
87}
88
89impl<I> Iterator for PanCJKVAnnotateIter<I>
90where
91 I: Iterator<Item = rune>,
92{
93 type Item = rune;
94
95 fn next(&mut self) -> Option<Self::Item> {
96 use crate::tables::is_han_script_lo_character;
97 let rune = self.runes.next()?;
98 if let Some(ch) = rune.into_char() {
99 if is_han_script_lo_character(ch) {
100 let mut s = String::new();
101 s.push(ch);
102 s.push(self.region_vs);
103 return Some(rune::from_grapheme_cluster(&s).unwrap());
104 } else {
105 return Some(rune);
106 }
107 } else {
108 let chars = rune.into_chars();
109 #[derive(Clone, Copy)]
110 enum State {
111 None,
112 HanScriptLoCore(usize),
113 HanScriptLoCoreAndVS(usize, usize),
114 }
115
116 let mut state = State::None;
117 for (idx, ch) in chars.clone().enumerate() {
118 match state {
119 State::None => {
120 if is_han_script_lo_character(ch) {
121 state = State::HanScriptLoCore(idx);
122 }
123 }
124 State::HanScriptLoCore(core_idx) => {
125 if idx == core_idx + 1 && is_vs(ch) {
126 state = State::HanScriptLoCoreAndVS(core_idx, idx);
127 }
128 break;
129 }
130 _ => unreachable!(),
131 }
132 }
133 if let State::HanScriptLoCore(idx) = state {
134 let mut str = String::new();
135 str.extend(chars.clone().take(idx + 1));
136 str.push(self.region_vs);
137 str.extend(chars.skip(idx + 1));
138 Some(rune::from_grapheme_cluster(&str).unwrap())
139 } else {
140 Some(rune)
141 }
142 }
143 }
144}
145
146fn is_vs(ch: char) -> bool {
147 let ch = ch as u32;
148 if ch >= 0xFE00 && ch <= 0xFE0F {
149 true
150 } else if ch >= 0xE0100 && ch <= 0xE01EF {
151 true
152 } else {
153 false
154 }
155}
156
157#[cfg(test)]
158mod tests {
159 use runestr::RuneString;
160
161 use crate::{PanCJKVAnnotate, PanCJKVRegion};
162
163 #[test]
164 fn test_han_with_ascent() {
165 let test = RuneString::from_str_lossy("\u{6211}\u{030C}\u{4EEC}\u{E01EE}\u{0301}");
166 assert_eq!(2, test.runes().count());
167 let result = test
168 .runes()
169 .annotate_with_pan_cjkv_region(PanCJKVRegion::XK)
170 .collect::<RuneString>();
171 assert_eq!(
172 &result.chars().collect::<Vec<_>>()[..],
173 &[
174 '\u{6211}',
175 '\u{E01EF}',
176 '\u{030C}',
177 '\u{4EEC}',
178 '\u{E01EE}',
179 '\u{0301}'
180 ]
181 );
182 assert_eq!(2, result.runes().count());
183 }
184}