hanconv/
conv.rs

1use crate::Dictionary::*;
2use crate::trie::Trie;
3use std::sync::LazyLock;
4
5pub enum Converters {
6    /// Simplified Chinese to Traditional Chinese
7    ///
8    /// 简体中文 → 繁体中文
9    S2T,
10    /// Traditional Chinese to Simplified Chinese
11    ///
12    /// 繁体中文 → 简体中文
13    T2S,
14    /// Simplified Chinese to Traditional Chinese (Taiwan)
15    ///
16    /// 简体中文 → 繁体中文(台湾)
17    S2TW,
18    /// Traditional Chinese (Taiwan) to Simplified Chinese
19    ///
20    /// 繁体中文(台湾)→ 简体中文
21    TW2S,
22    /// Simplified Chinese to Traditional Chinese (Taiwan) with Taiwanese idiom
23    ///
24    /// 简体中文 → 繁体中文(台湾),转换为台湾常用词
25    S2TWP,
26    /// Traditional Chinese (Taiwan) to Simplified Chinese with Mainland Chinese idiom
27    ///
28    /// 繁体中文(台湾)→ 简体中文,转换为中国大陆常用词
29    TW2SP,
30    /// Traditional Chinese to Traditional Chinese (Taiwan)
31    ///
32    /// 繁体中文 → 繁体中文(台湾)
33    T2TW,
34    /// Traditional Chinese (Taiwan) to Traditional Chinese
35    ///
36    /// 繁体中文(台湾)→ 繁体中文
37    TW2T,
38    /// Simplified Chinese to Traditional Chinese (Hong Kong)
39    ///
40    /// 简体中文 → 繁体中文(香港)
41    S2HK,
42    /// Traditional Chinese (Hong Kong) to Simplified Chinese
43    ///
44    /// 繁体中文(香港)→ 简体中文
45    HK2S,
46    /// Traditional Chinese to Traditional Chinese (Hong Kong)
47    ///
48    /// 繁体中文 → 繁体中文(香港)
49    T2HK,
50    /// Traditional Chinese (Hong Kong) to Traditional Chinese
51    ///
52    /// 繁体中文(香港)→ 繁体中文
53    HK2T,
54    /// Traditional Chinese characters (Kyūjitai) to New Japanese Kanji (Shinjitai)
55    ///
56    /// 繁体字 → 日文新字体
57    T2JP,
58    /// New Japanese Kanji (Shinjitai) to Traditional Chinese characters (Kyūjitai)
59    ///
60    /// 日文新字体 → 繁体字
61    JP2T,
62}
63
64macro_rules! trie {
65    [$a:expr $(,$b:expr)*] => {
66        $a.iter()$(.chain($b.iter()))*.collect::<Trie<&'static str>>()
67    };
68}
69
70static S2T_DICT: LazyLock<Trie<&'static str>> = LazyLock::new(|| trie![STPhrases, STCharacters]);
71static T2S_DICT: LazyLock<Trie<&'static str>> = LazyLock::new(|| trie![TSPhrases, TSCharacters]);
72static T2TW_VARIANTS_DICT: LazyLock<Trie<&'static str>> = LazyLock::new(|| trie![TWVariants]);
73static T2TW_PHRASES_DICT: LazyLock<Trie<&'static str>> = LazyLock::new(|| trie![TWPhrases]);
74static TW2T_VARIANTS_DICT: LazyLock<Trie<&'static str>> =
75    LazyLock::new(|| trie![TWVariantsRevPhrases, TWVariantsRev]);
76static TW2T_PHRASES_VARIANTS_DICT: LazyLock<Trie<&'static str>> =
77    LazyLock::new(|| trie![TWPhrasesRev, TWVariantsRevPhrases, TWVariantsRev]);
78static T2HK_VARIANTS_DICT: LazyLock<Trie<&'static str>> = LazyLock::new(|| trie![HKVariants]);
79static HK2T_VARIANTS_DICT: LazyLock<Trie<&'static str>> =
80    LazyLock::new(|| trie![HKVariantsRevPhrases, HKVariantsRev]);
81static T2JP_VARIANTS_DICT: LazyLock<Trie<&'static str>> = LazyLock::new(|| trie![JPVariants]);
82static JP2T_DICT: LazyLock<Trie<&'static str>> =
83    LazyLock::new(|| trie![JPShinjitaiPhrases, JPShinjitaiCharacters, JPVariantsRev]);
84
85impl Converters {
86    pub fn new_converter(&self) -> Converter {
87        Converter::new(self.dictionaries())
88    }
89
90    pub fn dictionaries(&self) -> Vec<&'static Trie<&'static str>> {
91        match self {
92            Converters::S2T => vec![&*S2T_DICT],
93            Converters::S2TW => vec![&*S2T_DICT, &*T2TW_VARIANTS_DICT],
94            Converters::S2TWP => vec![&*S2T_DICT, &*T2TW_PHRASES_DICT, &*T2TW_VARIANTS_DICT],
95            Converters::T2S => vec![&*T2S_DICT],
96            Converters::T2TW => vec![&*T2TW_VARIANTS_DICT],
97            Converters::TW2S => vec![&*TW2T_VARIANTS_DICT, &*T2S_DICT],
98            Converters::TW2SP => vec![&*TW2T_PHRASES_VARIANTS_DICT, &*T2S_DICT],
99            Converters::TW2T => vec![&*TW2T_VARIANTS_DICT],
100            Converters::S2HK => vec![&*S2T_DICT, &*T2HK_VARIANTS_DICT],
101            Converters::HK2S => vec![&*HK2T_VARIANTS_DICT, &*T2S_DICT],
102            Converters::HK2T => vec![&*HK2T_VARIANTS_DICT],
103            Converters::T2HK => vec![&*T2HK_VARIANTS_DICT],
104            Converters::T2JP => vec![&*T2JP_VARIANTS_DICT],
105            Converters::JP2T => vec![&*JP2T_DICT],
106        }
107    }
108}
109
110pub struct Converter(Vec<&'static Trie<&'static str>>);
111
112impl Converter {
113    pub fn new(dictionaries: Vec<&'static Trie<&'static str>>) -> Self {
114        Self(dictionaries)
115    }
116
117    pub fn convert(&self, s: impl AsRef<str>) -> String {
118        match self.0.split_first() {
119            None => s.as_ref().to_string(),
120            Some((first, rest)) => {
121                let mut s = first.convert(s);
122
123                for trie in rest {
124                    s = trie.convert(&s);
125                }
126
127                s
128            }
129        }
130    }
131}
132
133/// Simplified Chinese to Traditional Chinese
134///
135/// 简体中文 → 繁体中文
136pub fn s2t(s: impl AsRef<str>) -> String {
137    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::S2T.new_converter());
138    CONVERTER.convert(s)
139}
140
141/// Traditional Chinese to Simplified Chinese
142///
143/// 繁体中文 → 简体中文
144pub fn t2s(s: impl AsRef<str>) -> String {
145    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::T2S.new_converter());
146    CONVERTER.convert(s)
147}
148
149/// Simplified Chinese to Traditional Chinese (Taiwan)
150///
151/// 简体中文 → 繁体中文(台湾)
152pub fn s2tw(s: impl AsRef<str>) -> String {
153    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::S2TW.new_converter());
154    CONVERTER.convert(s)
155}
156
157/// Traditional Chinese (Taiwan) to Simplified Chinese
158///
159/// 繁体中文(台湾)→ 简体中文
160pub fn tw2s(s: impl AsRef<str>) -> String {
161    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::TW2S.new_converter());
162    CONVERTER.convert(s)
163}
164
165/// Simplified Chinese to Traditional Chinese (Taiwan) with Taiwanese idiom
166///
167/// 简体中文 → 繁体中文(台湾),转换为台湾常用词
168pub fn s2twp(s: impl AsRef<str>) -> String {
169    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::S2TWP.new_converter());
170    CONVERTER.convert(s)
171}
172
173/// Traditional Chinese (Taiwan) to Simplified Chinese with Mainland Chinese idiom
174///
175/// 繁体中文(台湾)→ 简体中文,转换为中国大陆常用词
176pub fn tw2sp(s: impl AsRef<str>) -> String {
177    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::TW2SP.new_converter());
178    CONVERTER.convert(s)
179}
180
181/// Traditional Chinese to Traditional Chinese (Taiwan)
182///
183/// 繁体中文 → 繁体中文(台湾)
184pub fn t2tw(s: impl AsRef<str>) -> String {
185    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::T2TW.new_converter());
186    CONVERTER.convert(s)
187}
188
189/// Traditional Chinese (Taiwan) to Traditional Chinese
190///
191/// 繁体中文(台湾)→ 繁体中文
192pub fn tw2t(s: impl AsRef<str>) -> String {
193    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::TW2T.new_converter());
194    CONVERTER.convert(s)
195}
196
197/// Simplified Chinese to Traditional Chinese (Hong Kong)
198///
199/// 简体中文 → 繁体中文(香港)
200pub fn s2hk(s: impl AsRef<str>) -> String {
201    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::S2HK.new_converter());
202    CONVERTER.convert(s)
203}
204
205/// Traditional Chinese (Hong Kong) to Simplified Chinese
206///
207/// 繁体中文(香港)→ 简体中文
208pub fn hk2s(s: impl AsRef<str>) -> String {
209    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::HK2S.new_converter());
210    CONVERTER.convert(s)
211}
212
213/// Traditional Chinese to Traditional Chinese (Hong Kong)
214///
215/// 繁体中文 → 繁体中文(香港)
216pub fn t2hk(s: impl AsRef<str>) -> String {
217    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::T2HK.new_converter());
218    CONVERTER.convert(s)
219}
220
221/// Traditional Chinese (Hong Kong) to Traditional Chinese
222///
223/// 繁体中文(香港)→ 繁体中文
224pub fn hk2t(s: impl AsRef<str>) -> String {
225    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::HK2T.new_converter());
226    CONVERTER.convert(s)
227}
228
229/// Traditional Chinese characters (Kyūjitai) to New Japanese Kanji (Shinjitai)
230///
231/// 繁体字 → 日文新字体
232pub fn t2jp(s: impl AsRef<str>) -> String {
233    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::T2JP.new_converter());
234    CONVERTER.convert(s)
235}
236
237/// New Japanese Kanji (Shinjitai) to Traditional Chinese characters (Kyūjitai)
238///
239/// 日文新字体 → 繁体字
240pub fn jp2t(s: impl AsRef<str>) -> String {
241    static CONVERTER: LazyLock<Converter> = LazyLock::new(|| Converters::JP2T.new_converter());
242    CONVERTER.convert(s)
243}