1use once_cell::sync::Lazy;
2use serde::Deserialize;
3use std::collections::HashMap;
4
5#[derive(Debug, Eq, PartialEq)]
6pub enum KVariantClass {
7 Wrong,
8 SementicVariant,
9 Simplified,
10 Old,
11 Equal,
12}
13
14#[derive(Debug, Eq, PartialEq)]
15pub struct KVariant {
16 pub source_ideograph: char,
17 pub classification: KVariantClass,
18 pub destination_ideograph: char,
19}
20
21#[derive(Deserialize)]
22pub struct TsvRow {
23 lhs: String,
24 relation: String,
25 rhs: String,
26}
27
28pub static KVARIANTS: Lazy<HashMap<char, KVariant>> = Lazy::new(|| {
29 let dictionary: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/kVariants.min.csv"));
37 let mut reader = csv::ReaderBuilder::new().has_headers(false).from_reader(dictionary);
38
39 let mut map: HashMap<char, KVariant> = HashMap::new();
40 for result in reader.deserialize() {
41 let line: TsvRow = result.unwrap();
42 let rhs = line.rhs.chars().next().unwrap();
43 let lhs = line.lhs.chars().next().unwrap();
44
45 if let Some(classification) = match line.relation.as_str() {
46 "wrong!" => Some(KVariantClass::Wrong),
47 "sem" => Some(KVariantClass::SementicVariant),
48 "simp" => Some(KVariantClass::Simplified),
49 "old" => Some(KVariantClass::Old),
50 "=" => Some(KVariantClass::Equal),
51 unexpected_classification => {
52 debug_assert!(
53 false,
54 "Unexpected classification {unexpected_classification:?} encountered. Consider handling or ignore explicaitly.",
55 );
56 None
57 }
58 } {
59 debug_assert!(
60 !map.contains_key(&lhs),
61 "Unexpected one source ideograph mapping to multiple destination ideographs.
62 If this happens in the future when we update kVariants.tsv, we would need to handle it
63 by, for example, deciding priorities for different classification types. "
64 );
65
66 map.insert(
67 lhs,
68 KVariant { source_ideograph: lhs, classification, destination_ideograph: rhs },
69 );
70 }
71 }
72
73 map
74});
75
76#[cfg(test)]
77mod test {
78 use super::*;
79
80 #[test]
81 fn test_kvariants() {
82 assert_eq!(
83 KVARIANTS.get(&'澚'),
84 Some(&KVariant {
85 source_ideograph: '澚',
86 classification: KVariantClass::Wrong,
87 destination_ideograph: '澳'
88 }),
89 );
90 assert_eq!(
91 KVARIANTS.get(&'䀾'),
92 Some(&KVariant {
93 source_ideograph: '䀾',
94 classification: KVariantClass::SementicVariant,
95 destination_ideograph: '䁈',
96 }),
97 );
98 assert_eq!(
99 KVARIANTS.get(&'亚'),
100 Some(&KVariant {
101 source_ideograph: '亚',
102 classification: KVariantClass::Simplified,
103 destination_ideograph: '亞',
104 }),
105 );
106 assert_eq!(
107 KVARIANTS.get(&'㮺'),
108 Some(&KVariant {
109 source_ideograph: '㮺',
110 classification: KVariantClass::Old,
111 destination_ideograph: '本',
112 }),
113 );
114 assert_eq!(
115 KVARIANTS.get(&'刄'),
116 Some(&KVariant {
117 source_ideograph: '刄',
118 classification: KVariantClass::Equal,
119 destination_ideograph: '刃',
120 }),
121 );
122 assert_eq!(KVARIANTS.get(&'刃'), None);
123 }
124
125 #[test]
126 fn test_no_loop() {
127 for value in KVARIANTS.values() {
128 match KVARIANTS.get(&value.destination_ideograph) {
129 None => (),
131
132 Some(reverse_lookup_value) => {
134 assert_ne!(
135 value.destination_ideograph,
136 reverse_lookup_value.destination_ideograph
137 );
138 }
139 }
140 }
141 }
142}