1mod g2pw;
2mod jyutping_list;
3mod split;
4mod yue;
5
6use {
7 crate::{error::GSVError, text::get_phone_symbol},
8 log::{debug, warn},
9};
10pub use {
11 g2pw::{G2PW, G2PWOut},
12 split::split_zh_ph,
13};
14
15#[derive(Debug)]
16pub enum ZhMode {
17 Mandarin,
18 Cantonese,
19}
20
21#[derive(Debug, Default)]
22pub struct ZhSentence {
23 pub phone_ids: Vec<i64>,
24 pub phones: Vec<G2PWOut>,
25 pub word2ph: Vec<i32>,
26 pub text: String,
27}
28
29impl ZhSentence {
30 pub fn g2p(&mut self, g2pw: &mut G2PW, mode: ZhMode) {
32 match mode {
33 ZhMode::Mandarin => self.g2p_mandarin(g2pw),
34 ZhMode::Cantonese => self.g2p_cantonese(),
35 }
36 }
37
38 fn g2p_mandarin(&mut self, g2pw: &mut G2PW) {
40 let pinyin = g2pw.g2p(&self.text);
41 if pinyin.len() != self.text.chars().count() && !self.text.is_empty() {
42 warn!(
43 "Pinyin length mismatch: {} (pinyin) vs {} (text chars) for text '{}'",
44 pinyin.len(),
45 self.text.chars().count(),
46 self.text
47 );
48 }
49 self.phones = pinyin;
50 debug!("phones: {:?}", self.phones);
51 self.build_phone_id_and_word2ph();
52 }
53
54 fn g2p_cantonese(&mut self) {
56 let (pinyin, word2ph) = yue::g2p(&self.text);
57 debug!("pinyin: {:?}", pinyin);
58 self.phones = pinyin.into_iter().map(G2PWOut::Yue).collect();
59 self.build_phone_id_and_word2ph();
60 self.word2ph = word2ph; }
62
63 fn build_phone_id_and_word2ph(&mut self) {
65 self.phone_ids.clear();
66 self.word2ph.clear();
67 for p in &self.phones {
68 match p {
69 G2PWOut::Pinyin(p) => {
70 let (initial, final_) = split_zh_ph(p);
71 self.phone_ids.push(get_phone_symbol(initial));
72 if !final_.is_empty() {
73 self.phone_ids.push(get_phone_symbol(final_));
74 self.word2ph.push(2);
75 } else {
76 self.word2ph.push(1);
77 }
78 }
79 G2PWOut::Yue(c) => {
80 self.phone_ids.push(get_phone_symbol(c));
81 self.word2ph.push(2);
82 }
83 G2PWOut::RawChar(c) => {
84 self.phone_ids.push(get_phone_symbol(&c.to_string()));
85 self.word2ph.push(1);
86 }
87 }
88 }
89 debug!("phone_id {:?}", self.phone_ids);
90 }
91
92 pub fn build_phone(&self) -> Result<Vec<i64>, GSVError> {
94 Ok(self.phone_ids.clone())
95 }
96}