gpt_sovits/text/
zh.rs

1mod g2pw;
2mod jyutping_list;
3mod split;
4mod yue;
5
6use {
7    crate::{error::GSVError, text::get_phone_symbol},
8    log::{debug, warn},
9};
10pub use {
11    g2pw::{G2PW, G2PWOut},
12    split::split_zh_ph,
13};
14
15#[derive(Debug)]
16pub enum ZhMode {
17    Mandarin,
18    Cantonese,
19}
20
21#[derive(Debug, Default)]
22pub struct ZhSentence {
23    pub phone_ids: Vec<i64>,
24    pub phones: Vec<G2PWOut>,
25    pub word2ph: Vec<i32>,
26    pub text: String,
27}
28
29impl ZhSentence {
30    /// Processes Chinese text into phonemes and phone IDs based on the specified mode.
31    pub fn g2p(&mut self, g2pw: &mut G2PW, mode: ZhMode) {
32        match mode {
33            ZhMode::Mandarin => self.g2p_mandarin(g2pw),
34            ZhMode::Cantonese => self.g2p_cantonese(),
35        }
36    }
37
38    /// Processes Mandarin text using the G2PW model.
39    fn g2p_mandarin(&mut self, g2pw: &mut G2PW) {
40        let pinyin = g2pw.g2p(&self.text);
41        if pinyin.len() != self.text.chars().count() && !self.text.is_empty() {
42            warn!(
43                "Pinyin length mismatch: {} (pinyin) vs {} (text chars) for text '{}'",
44                pinyin.len(),
45                self.text.chars().count(),
46                self.text
47            );
48        }
49        self.phones = pinyin;
50        debug!("phones: {:?}", self.phones);
51        self.build_phone_id_and_word2ph();
52    }
53
54    /// Processes Cantonese text using the yue module.
55    fn g2p_cantonese(&mut self) {
56        let (pinyin, word2ph) = yue::g2p(&self.text);
57        debug!("pinyin: {:?}", pinyin);
58        self.phones = pinyin.into_iter().map(G2PWOut::Yue).collect();
59        self.build_phone_id_and_word2ph();
60        self.word2ph = word2ph; // Override Pinnacle if Cantonese provides word2ph
61    }
62
63    /// Converts phonemes to phone IDs and generates word-to-phoneme mapping.
64    fn build_phone_id_and_word2ph(&mut self) {
65        self.phone_ids.clear();
66        self.word2ph.clear();
67        for p in &self.phones {
68            match p {
69                G2PWOut::Pinyin(p) => {
70                    let (initial, final_) = split_zh_ph(p);
71                    self.phone_ids.push(get_phone_symbol(initial));
72                    if !final_.is_empty() {
73                        self.phone_ids.push(get_phone_symbol(final_));
74                        self.word2ph.push(2);
75                    } else {
76                        self.word2ph.push(1);
77                    }
78                }
79                G2PWOut::Yue(c) => {
80                    self.phone_ids.push(get_phone_symbol(c));
81                    self.word2ph.push(2);
82                }
83                G2PWOut::RawChar(c) => {
84                    self.phone_ids.push(get_phone_symbol(&c.to_string()));
85                    self.word2ph.push(1);
86                }
87            }
88        }
89        debug!("phone_id {:?}", self.phone_ids);
90    }
91
92    /// Returns the phoneme IDs for the sentence.
93    pub fn build_phone(&self) -> Result<Vec<i64>, GSVError> {
94        Ok(self.phone_ids.clone())
95    }
96}