1use serde::Deserialize;
2use std::collections::{HashMap, HashSet};
3use std::sync::LazyLock;
4
5const S0: u32 = 0xAC00;
6const L0: u32 = 0x1100;
7const V0: u32 = 0x1161;
8const T0: u32 = 0x11A7;
9const L_COUNT: u32 = 19;
10const V_COUNT: u32 = 21;
11const T_COUNT: u32 = 28;
12const N_COUNT: u32 = V_COUNT * T_COUNT;
13const S_COUNT: u32 = L_COUNT * N_COUNT;
14const S1: u32 = S0 + S_COUNT;
15const L1: u32 = L0 + L_COUNT;
16const V1: u32 = V0 + V_COUNT;
17const T1: u32 = T0 + T_COUNT;
18
19#[derive(Deserialize)]
20struct RawNf {
21 ranks: Vec<Vec<u32>>,
22 exclusions: Vec<u32>,
23 decomp: Vec<(u32, Vec<u32>)>,
24}
25
26struct NfData {
27 shifted_rank: HashMap<u32, u32>,
28 decomp: HashMap<u32, Vec<u32>>,
29 recomp: HashMap<u32, HashMap<u32, u32>>,
30}
31
32static NF: LazyLock<NfData> = LazyLock::new(|| {
33 let raw: RawNf = serde_json::from_str(include_str!("../data/nf.json")).expect("valid nf.json");
34 let mut shifted_rank = HashMap::new();
35 for (i, cps) in raw.ranks.iter().enumerate() {
36 let rank = ((i as u32) + 1) << 24;
37 for &cp in cps {
38 shifted_rank.insert(cp, rank);
39 }
40 }
41
42 let exclusions: HashSet<u32> = raw.exclusions.into_iter().collect();
43 let mut decomp = HashMap::new();
44 let mut recomp: HashMap<u32, HashMap<u32, u32>> = HashMap::new();
45 for (cp, mut cps) in raw.decomp {
46 if !exclusions.contains(&cp) && cps.len() == 2 {
47 recomp.entry(cps[0]).or_default().insert(cps[1], cp);
48 }
49 cps.reverse();
50 decomp.insert(cp, cps);
51 }
52
53 NfData {
54 shifted_rank,
55 decomp,
56 recomp,
57 }
58});
59
60fn unpack_cc(packed: u32) -> u32 {
61 (packed >> 24) & 0xFF
62}
63
64fn unpack_cp(packed: u32) -> u32 {
65 packed & 0xFF_FFFF
66}
67
68fn is_hangul(cp: u32) -> bool {
69 (S0..S1).contains(&cp)
70}
71
72fn compose_pair(a: u32, b: u32) -> Option<u32> {
73 if (L0..L1).contains(&a) && (V0..V1).contains(&b) {
74 Some(S0 + (a - L0) * N_COUNT + (b - V0) * T_COUNT)
75 } else if is_hangul(a) && b > T0 && b < T1 && (a - S0).is_multiple_of(T_COUNT) {
76 Some(a + (b - T0))
77 } else {
78 NF.recomp.get(&a).and_then(|bucket| bucket.get(&b)).copied()
79 }
80}
81
82fn decomposed(cps: &[u32]) -> Vec<u32> {
83 let mut ret = Vec::new();
84 let mut buf = Vec::new();
85 let mut check_order = false;
86
87 let add = |ret: &mut Vec<u32>, check_order: &mut bool, cp: u32| {
88 if let Some(&cc) = NF.shifted_rank.get(&cp) {
89 *check_order = true;
90 ret.push(cp | cc);
91 } else {
92 ret.push(cp);
93 }
94 };
95
96 for &cp0 in cps {
97 let mut cp = cp0;
98 loop {
99 if cp < 0x80 {
100 ret.push(cp);
101 } else if is_hangul(cp) {
102 let s_index = cp - S0;
103 let l_index = s_index / N_COUNT;
104 let v_index = (s_index % N_COUNT) / T_COUNT;
105 let t_index = s_index % T_COUNT;
106 add(&mut ret, &mut check_order, L0 + l_index);
107 add(&mut ret, &mut check_order, V0 + v_index);
108 if t_index > 0 {
109 add(&mut ret, &mut check_order, T0 + t_index);
110 }
111 } else if let Some(mapped) = NF.decomp.get(&cp) {
112 buf.extend_from_slice(mapped);
113 } else {
114 add(&mut ret, &mut check_order, cp);
115 }
116
117 if let Some(next) = buf.pop() {
118 cp = next;
119 } else {
120 break;
121 }
122 }
123 }
124
125 if check_order && ret.len() > 1 {
126 let mut prev_cc = unpack_cc(ret[0]);
127 let mut i = 1;
128 while i < ret.len() {
129 let cc = unpack_cc(ret[i]);
130 if cc == 0 || prev_cc <= cc {
131 prev_cc = cc;
132 i += 1;
133 continue;
134 }
135 let mut j = i - 1;
136 loop {
137 ret.swap(j + 1, j);
138 if j == 0 {
139 break;
140 }
141 j -= 1;
142 prev_cc = unpack_cc(ret[j]);
143 if prev_cc <= cc {
144 break;
145 }
146 }
147 prev_cc = unpack_cc(ret[i]);
148 i += 1;
149 }
150 }
151
152 ret
153}
154
155fn composed_from_decomposed(v: &[u32]) -> Vec<u32> {
156 let mut ret = Vec::new();
157 let mut stack = Vec::new();
158 let mut prev_cp: Option<u32> = None;
159 let mut prev_cc = 0;
160
161 for &packed in v {
162 let cc = unpack_cc(packed);
163 let cp = unpack_cp(packed);
164 if let Some(prev) = prev_cp {
165 if prev_cc > 0 && prev_cc >= cc {
166 if cc == 0 {
167 ret.push(prev);
168 ret.append(&mut stack);
169 prev_cp = Some(cp);
170 } else {
171 stack.push(cp);
172 }
173 prev_cc = cc;
174 } else if let Some(composed) = compose_pair(prev, cp) {
175 prev_cp = Some(composed);
176 } else if prev_cc == 0 && cc == 0 {
177 ret.push(prev);
178 prev_cp = Some(cp);
179 } else {
180 stack.push(cp);
181 prev_cc = cc;
182 }
183 } else if cc == 0 {
184 prev_cp = Some(cp);
185 } else {
186 ret.push(cp);
187 }
188 }
189
190 if let Some(prev) = prev_cp {
191 ret.push(prev);
192 ret.append(&mut stack);
193 }
194
195 ret
196}
197
198pub fn nfd(cps: &[u32]) -> Vec<u32> {
199 decomposed(cps).into_iter().map(unpack_cp).collect()
200}
201
202pub fn nfc(cps: &[u32]) -> Vec<u32> {
203 composed_from_decomposed(&decomposed(cps))
204}