dodo_zh/
lib.rs

1//! Dodo-zh is a crate which provide utilities method on pinyin and cedict file. It enables you to do the following operations
2//! - Load cedict file from a given path with the keys either being in Simplified or Traditional Chinese
3//!
4//! Doing several operations on a given pinyin such as:
5//! - convert a pinyin to a zhuyin
6//! - convert a pinyin to a wade giles
7//! - convert a pinyin which has number tones e.g: wo3 to a pinyin with tone markers wǒ
8//! - convert a pinyin with tones markers to numbers
9//! - convert a simplified <-> traditional text
10//! - detect chinese variant of a text
11use crate::error::Error;
12use cedict::Dictionary;
13use pinyin::accent::PinyinAccent;
14use pinyin::numbers::PinyinNumber;
15use std::path::PathBuf;
16use variant::KeyVariant;
17use wade_giles::WadeGiles;
18use zhuyin::Zhuyin;
19
20pub mod cedict;
21pub(crate) mod error;
22pub(crate) mod pinyin;
23pub mod variant;
24pub(crate) mod wade_giles;
25pub(crate) mod zhuyin;
26
27// Constant
28const SEPARATOR: &str = " ";
29
30/// Convert a sequence of pinyin with tone markers into zhuyin
31/// <div class="warning">Pinyin with tone number</div>
32///
33/// If you have a pinyin with numbers. You may first convert the pinyin to a tone markers with the [`self::convert_pinyin_tone_number_to_tone_mark`]
34///
35/// # Arguments
36///
37/// * `text` - S
38///
39/// # Examples
40///
41/// ```
42/// let zhuyin = dodo_zh::convert_pinyin_to_zhuyin("wǒ").unwrap();
43/// ```
44pub fn convert_pinyin_to_zhuyin<S>(text: S) -> Result<String, Error>
45where
46    S: AsRef<str> + Clone,
47{
48    let splitted_text = text.as_ref().split_whitespace().collect::<Vec<_>>();
49
50    let zh = Zhuyin::new()?;
51
52    let res = splitted_text
53        .into_iter()
54        .map(|content| zh.get_zhuyin_from_pinyin(content).into_owned())
55        .collect::<Vec<_>>()
56        .join(SEPARATOR);
57
58    Ok(res)
59}
60
61/// Convert a sequence of pinyin into wade giles
62/// <div class="warning">Pinyin with tone number</div>
63///
64/// If you have a pinyin with numbers. You may first convert the pinyin to a tone markers with the [`self::convert_pinyin_tone_number_to_tone_mark`]
65/// # Arguments
66///
67/// * `text` - S
68///
69/// # Examples
70///
71/// ```
72/// let wade = dodo_zh::convert_pinyin_to_wade_giles("wǒ").unwrap();
73/// ```
74pub fn convert_pinyin_to_wade_giles<S>(text: S) -> Result<String, Error>
75where
76    S: AsRef<str> + Clone,
77{
78    let splitted_text = text.as_ref().split_whitespace().collect::<Vec<_>>();
79
80    let res = splitted_text
81        .into_iter()
82        .map(|content| WadeGiles(content).convert_pinyin_to_wade_giles())
83        .collect::<Vec<_>>()
84        .join(SEPARATOR);
85
86    Ok(res)
87}
88
89/// Convert a sequence of pinyin with number to a pinyin tone mark
90///
91/// # Arguments
92///
93/// * `text` - S
94///
95/// # Examples
96///
97/// ```
98/// let pinyin_tone = dodo_zh::convert_pinyin_tone_number_to_tone_mark("wo3").unwrap();
99/// ```
100pub fn convert_pinyin_tone_number_to_tone_mark<S>(text: S) -> Result<String, Error>
101where
102    S: AsRef<str> + Clone,
103{
104    let splitted_text = text.as_ref().split_whitespace().collect::<Vec<_>>();
105
106    let res = splitted_text
107        .into_iter()
108        .filter_map(|content| PinyinAccent(content).replace_tone_numbers_with_tone_marks())
109        .collect::<Vec<_>>()
110        .join(SEPARATOR);
111
112    Ok(res)
113}
114
115/// Convert a sequence of pinyin with accent into a pinyin with number
116///
117/// # Arguments
118///
119/// * `text` - S
120///
121/// # Examples
122///
123/// ```
124/// let pinyin_number = dodo_zh::convert_pinyin_accent_to_pinyin_number("wǒ").unwrap();
125/// ```
126pub fn convert_pinyin_accent_to_pinyin_number<S>(text: S) -> Result<String, Error>
127where
128    S: AsRef<str> + Clone,
129{
130    let splitted_text = text.as_ref().split_whitespace().collect::<Vec<_>>();
131
132    let res = splitted_text
133        .into_iter()
134        .map(|content| PinyinNumber(content.to_string()).into_number())
135        .collect::<Vec<_>>()
136        .join(SEPARATOR);
137
138    Ok(res)
139}
140
141/// Load Cedict Dictionary
142///
143/// # Arguments
144///
145/// * `p` - PathBuf
146/// * `key_variant` - KeyVariant
147///
148/// # Examples
149///
150/// ```
151/// use dodo_zh::variant::KeyVariant;
152/// use std::path::PathBuf;
153///
154/// let dict = dodo_zh::load_cedict_dictionary(PathBuf::new(), KeyVariant::Traditional);
155/// ```
156pub fn load_cedict_dictionary(p: PathBuf, key_variant: KeyVariant) -> Result<Dictionary, Error> {
157    let dictionary = Dictionary::new(&p, key_variant)?;
158
159    Ok(dictionary)
160}
161
162/// Convert a chinese text to a desired variant (simplified <-> tradtional)
163///
164/// # Arguments
165///
166/// * `p` - PathBuf
167/// * `content` - S
168/// * `input_variant` - KeyVariant
169/// * `target_varaint` - KeyVariant
170///
171/// # Examples
172///
173/// ```
174/// use dodo_zh::variant::KeyVariant;
175/// use std::path::PathBuf;
176///
177/// let converted = dodo_zh::convert_text_to_desired_variant(PathBuf::new(), "大家好我是馬克的摯友", KeyVariant::Traditional, KeyVariant::Simplified);
178/// ```
179pub fn convert_text_to_desired_variant<S: AsRef<str>>(
180    p: PathBuf,
181    content: S,
182    input_variant: KeyVariant,
183    target_variant: KeyVariant,
184) -> Result<String, Error> {
185    variant::initialize_dictionaries(&p)?;
186
187    variant::KeyVariant::convert_text_to_desired_variant(content, input_variant, target_variant)
188        .ok_or_else(|| Error::Parse("Unable to convert content to target key variant".to_string()))
189}
190
191/// Detect which variant of chinese is the text. If the given path for the cedict dictionary is passed
192/// the detection will use the cedict. Otherwise it'll try to do the detection through unicode.
193/// ⚠️ Unicode detection isn't very accurate. It's recommended to use the cedict dictionary for a precise detection.
194///
195/// # Arguments
196///
197/// * `p` - PathBuf
198/// * `content` - S
199///
200/// # Examples
201///
202/// ```
203/// use dodo_zh::variant::KeyVariant;
204///
205/// let variant = dodo_zh::detect_which_variant(None, "今天是星期天. 我明天不要去公司工作");
206/// ```
207pub fn detect_which_variant<S: AsRef<str>>(
208    path: Option<PathBuf>,
209    content: S,
210) -> Result<KeyVariant, Error> {
211    match path {
212        Some(p) => {
213            variant::initialize_dictionaries(&p)?;
214            variant::KeyVariant::which_variant(content)
215                .ok_or_else(|| Error::Parse("Unable to detect chinese variant".to_string()))
216        }
217        None => Ok(variant::KeyVariant::detect_variant_with_unicode(content)),
218    }
219}