1mod g2p_en;
2
3pub use g2p_en::*;
4use {
5 crate::{error::GSVError, text::get_phone_symbol},
6 log::debug,
7 std::borrow::Cow,
8};
9
10#[derive(PartialEq, Eq, Clone)]
11pub enum EnWord {
12 Word(String),
13 Punctuation(&'static str),
14}
15
16impl std::fmt::Debug for EnWord {
17 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
18 match self {
19 EnWord::Word(w) => write!(f, "\"{}\"", w),
20 EnWord::Punctuation(p) => write!(f, "\"{}\"", p),
21 }
22 }
23}
24
25#[derive(Debug, Default)]
26pub struct EnSentence {
27 pub phone_ids: Vec<i64>,
28 pub phones: Vec<Cow<'static, str>>,
29 pub word2ph: Vec<i32>,
30 pub text: Vec<EnWord>,
31}
32
33impl EnSentence {
34 pub fn g2p(&mut self, g2p_en: &mut G2pEn) -> Result<(), GSVError> {
35 self.phones.clear();
36 self.phone_ids.clear();
37 self.word2ph.clear();
38 for word in &self.text {
39 match word {
40 EnWord::Word(w) => {
41 let phonemes = g2p_en.g2p(w)?;
42 let mut cnt = 0;
43 for ph in phonemes {
44 self.phones.push(Cow::Owned(ph.clone()));
45 self.phone_ids.push(get_phone_symbol(&ph));
46 cnt += 1;
47 if ph.contains("0")
48 || ph.contains("1")
49 || ph.contains("2")
50 || ph.contains("3")
51 || ph.contains("4")
52 {
53 self.word2ph.push(cnt);
54 cnt = 0;
55 }
56 }
57 if cnt > 0 {
58 self.word2ph.push(cnt);
59 }
60 }
61 EnWord::Punctuation(p) => {
62 self.phones.push(Cow::Borrowed(p));
63 self.phone_ids.push(get_phone_symbol(p));
64 self.word2ph.push(1);
65 }
66 };
67 }
68 debug!("EnSentence phones: {:?}", self.phones);
69 debug!("EnSentence phone_ids: {:?}", self.phone_ids);
70 debug!("EnSentence word2ph: {:?}", self.word2ph);
71 Ok(())
72 }
73
74 pub fn build_phone(&self) -> Result<Vec<i64>, GSVError> {
75 Ok(self.phone_ids.clone())
76 }
77
78 pub fn get_text_string(&self) -> String {
79 let mut result = String::with_capacity(self.text.len() * 5); for w in &self.text {
81 match w {
82 EnWord::Word(s) => result.push_str(s),
83 EnWord::Punctuation(p) => result.push_str(p),
84 }
85 }
86 result
87 }
88}