dodo_zh/lib.rs
1//! Dodo-zh is a crate which provide utilities method on pinyin and cedict file. It enables you to do the following operations
2//! - Load cedict file from a given path with the keys either being in Simplified or Traditional Chinese
3//!
4//! Doing several operations on a given pinyin such as:
5//! - convert a pinyin to a zhuyin
6//! - convert a pinyin to a wade giles
7//! - convert a pinyin which has number tones e.g: wo3 to a pinyin with tone markers wǒ
8//! - convert a pinyin with tones markers to numbers
9//! - convert a simplified <-> traditional text
10//! - detect chinese variant of a text
11use crate::error::Error;
12use cedict::Dictionary;
13use pinyin::accent::PinyinAccent;
14use pinyin::numbers::PinyinNumber;
15use std::path::PathBuf;
16use variant::KeyVariant;
17use wade_giles::WadeGiles;
18use zhuyin::Zhuyin;
19
20pub mod cedict;
21pub(crate) mod error;
22pub(crate) mod pinyin;
23pub mod variant;
24pub(crate) mod wade_giles;
25pub(crate) mod zhuyin;
26
27// Constant
28const SEPARATOR: &str = " ";
29
30/// Convert a sequence of pinyin with tone markers into zhuyin
31/// <div class="warning">Pinyin with tone number</div>
32///
33/// If you have a pinyin with numbers. You may first convert the pinyin to a tone markers with the [`self::convert_pinyin_tone_number_to_tone_mark`]
34///
35/// # Arguments
36///
37/// * `text` - S
38///
39/// # Examples
40///
41/// ```
42/// let zhuyin = dodo_zh::convert_pinyin_to_zhuyin("wǒ").unwrap();
43/// ```
44pub fn convert_pinyin_to_zhuyin<S>(text: S) -> Result<String, Error>
45where
46 S: AsRef<str> + Clone,
47{
48 let splitted_text = text.as_ref().split_whitespace().collect::<Vec<_>>();
49
50 let zh = Zhuyin::new()?;
51
52 let res = splitted_text
53 .into_iter()
54 .map(|content| zh.get_zhuyin_from_pinyin(content).into_owned())
55 .collect::<Vec<_>>()
56 .join(SEPARATOR);
57
58 Ok(res)
59}
60
61/// Convert a sequence of pinyin into wade giles
62/// <div class="warning">Pinyin with tone number</div>
63///
64/// If you have a pinyin with numbers. You may first convert the pinyin to a tone markers with the [`self::convert_pinyin_tone_number_to_tone_mark`]
65/// # Arguments
66///
67/// * `text` - S
68///
69/// # Examples
70///
71/// ```
72/// let wade = dodo_zh::convert_pinyin_to_wade_giles("wǒ").unwrap();
73/// ```
74pub fn convert_pinyin_to_wade_giles<S>(text: S) -> Result<String, Error>
75where
76 S: AsRef<str> + Clone,
77{
78 let splitted_text = text.as_ref().split_whitespace().collect::<Vec<_>>();
79
80 let res = splitted_text
81 .into_iter()
82 .map(|content| WadeGiles(content).convert_pinyin_to_wade_giles())
83 .collect::<Vec<_>>()
84 .join(SEPARATOR);
85
86 Ok(res)
87}
88
89/// Convert a sequence of pinyin with number to a pinyin tone mark
90///
91/// # Arguments
92///
93/// * `text` - S
94///
95/// # Examples
96///
97/// ```
98/// let pinyin_tone = dodo_zh::convert_pinyin_tone_number_to_tone_mark("wo3").unwrap();
99/// ```
100pub fn convert_pinyin_tone_number_to_tone_mark<S>(text: S) -> Result<String, Error>
101where
102 S: AsRef<str> + Clone,
103{
104 let splitted_text = text.as_ref().split_whitespace().collect::<Vec<_>>();
105
106 let res = splitted_text
107 .into_iter()
108 .filter_map(|content| PinyinAccent(content).replace_tone_numbers_with_tone_marks())
109 .collect::<Vec<_>>()
110 .join(SEPARATOR);
111
112 Ok(res)
113}
114
115/// Convert a sequence of pinyin with accent into a pinyin with number
116///
117/// # Arguments
118///
119/// * `text` - S
120///
121/// # Examples
122///
123/// ```
124/// let pinyin_number = dodo_zh::convert_pinyin_accent_to_pinyin_number("wǒ").unwrap();
125/// ```
126pub fn convert_pinyin_accent_to_pinyin_number<S>(text: S) -> Result<String, Error>
127where
128 S: AsRef<str> + Clone,
129{
130 let splitted_text = text.as_ref().split_whitespace().collect::<Vec<_>>();
131
132 let res = splitted_text
133 .into_iter()
134 .map(|content| PinyinNumber(content.to_string()).into_number())
135 .collect::<Vec<_>>()
136 .join(SEPARATOR);
137
138 Ok(res)
139}
140
141/// Load Cedict Dictionary
142///
143/// # Arguments
144///
145/// * `p` - PathBuf
146/// * `key_variant` - KeyVariant
147///
148/// # Examples
149///
150/// ```
151/// use dodo_zh::variant::KeyVariant;
152/// use std::path::PathBuf;
153///
154/// let dict = dodo_zh::load_cedict_dictionary(PathBuf::new(), KeyVariant::Traditional);
155/// ```
156pub fn load_cedict_dictionary(p: PathBuf, key_variant: KeyVariant) -> Result<Dictionary, Error> {
157 let dictionary = Dictionary::new(&p, key_variant)?;
158
159 Ok(dictionary)
160}
161
162/// Convert a chinese text to a desired variant (simplified <-> tradtional)
163///
164/// # Arguments
165///
166/// * `p` - PathBuf
167/// * `content` - S
168/// * `input_variant` - KeyVariant
169/// * `target_varaint` - KeyVariant
170///
171/// # Examples
172///
173/// ```
174/// use dodo_zh::variant::KeyVariant;
175/// use std::path::PathBuf;
176///
177/// let converted = dodo_zh::convert_text_to_desired_variant(PathBuf::new(), "大家好我是馬克的摯友", KeyVariant::Traditional, KeyVariant::Simplified);
178/// ```
179pub fn convert_text_to_desired_variant<S: AsRef<str>>(
180 p: PathBuf,
181 content: S,
182 input_variant: KeyVariant,
183 target_variant: KeyVariant,
184) -> Result<String, Error> {
185 variant::initialize_dictionaries(&p)?;
186
187 variant::KeyVariant::convert_text_to_desired_variant(content, input_variant, target_variant)
188 .ok_or_else(|| Error::Parse("Unable to convert content to target key variant".to_string()))
189}
190
191/// Detect which variant of chinese is the text. If the given path for the cedict dictionary is passed
192/// the detection will use the cedict. Otherwise it'll try to do the detection through unicode.
193/// ⚠️ Unicode detection isn't very accurate. It's recommended to use the cedict dictionary for a precise detection.
194///
195/// # Arguments
196///
197/// * `p` - PathBuf
198/// * `content` - S
199///
200/// # Examples
201///
202/// ```
203/// use dodo_zh::variant::KeyVariant;
204///
205/// let variant = dodo_zh::detect_which_variant(None, "今天是星期天. 我明天不要去公司工作");
206/// ```
207pub fn detect_which_variant<S: AsRef<str>>(
208 path: Option<PathBuf>,
209 content: S,
210) -> Result<KeyVariant, Error> {
211 match path {
212 Some(p) => {
213 variant::initialize_dictionaries(&p)?;
214 variant::KeyVariant::which_variant(content)
215 .ok_or_else(|| Error::Parse("Unable to detect chinese variant".to_string()))
216 }
217 None => Ok(variant::KeyVariant::detect_variant_with_unicode(content)),
218 }
219}