1use crate::TextUnit;
2use crate::WordPhonemizer;
3use crate::phoneme::PhonemeTab;
4use crate::phoneme::load::{ActiveTable, PhonemeData};
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
7pub enum Language {
8 English,
9 Vietnamese,
10}
11
12impl Language {
13 pub fn as_str(self) -> &'static str {
14 match self {
15 Self::English => "en",
16 Self::Vietnamese => "vi",
17 }
18 }
19}
20
21impl TryFrom<&str> for Language {
22 type Error = &'static str;
23
24 fn try_from(value: &str) -> Result<Self, Self::Error> {
25 match value {
26 "en" => Ok(Self::English),
27 "vi" => Ok(Self::Vietnamese),
28 _ => Err("unsupported language"),
29 }
30 }
31}
32
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34pub enum StressMarker {
35 Unstressed,
36 StressDown,
37 Secondary,
38 Tertiary,
39 Primary,
40 PriorityPrimary,
41 Previous,
42}
43
44impl StressMarker {
45 pub fn ipa(&self) -> &str {
46 match self {
47 Self::Unstressed => "",
48 Self::StressDown => "",
49 Self::Secondary => "",
50 Self::Tertiary => "",
51 Self::Primary => "",
52 Self::PriorityPrimary => "",
53 Self::Previous => "",
54 }
55 }
56}
57
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub enum PhonemeToken {
60 Symbol(String),
61 Stress(StressMarker),
62 WordBoundary,
63 Control(u8),
64}
65
66impl PhonemeToken {
67 pub fn ipa(&self) -> &str {
68 match self {
69 PhonemeToken::Symbol(s) => s.as_str(),
70 PhonemeToken::Stress(s) => s.ipa(),
71 PhonemeToken::WordBoundary => " ",
72 PhonemeToken::Control(_) => "",
73 }
74 }
75}
76
77#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
78pub struct WordFlags(pub(crate) u32);
79
80impl WordFlags {
81 pub fn raw(self) -> u32 {
82 self.0
83 }
84
85 pub fn strend(self) -> bool {
86 self.0 & (1 << 9) != 0
87 }
88
89 pub fn strend2(self) -> bool {
90 self.0 & (1 << 10) != 0
91 }
92}
93
94#[derive(Debug, Clone, PartialEq, Eq)]
95pub struct WordPhoneme {
96 pub language: Language,
97 pub normalized_word: String,
98 pub tokens: Vec<PhonemeToken>,
99 pub flags: WordFlags,
100 raw_phonemes: Vec<u8>,
101}
102
103impl WordPhoneme {
104 pub(crate) fn from_raw(
105 language: Language,
106 normalized_word: String,
107 raw_phonemes: Vec<u8>,
108 flags: u32,
109 phdata: &PhonemeData,
110 ) -> Self {
111 let table = phdata.get_active_table(language.as_str()).ok();
112 let tokens = raw_phonemes
113 .iter()
114 .copied()
115 .take_while(|code| *code != 0)
116 .map(|code| map_code(code, phdata, table))
117 .collect();
118
119 Self {
120 language,
121 normalized_word,
122 tokens,
123 flags: WordFlags(flags),
124 raw_phonemes,
125 }
126 }
127
128 pub fn has_primary_stress(&self) -> bool {
129 self.raw_phonemes.iter().any(|&code| matches!(code, 6 | 7))
130 }
131
132 pub fn has_secondary_stress(&self) -> bool {
133 self.raw_phonemes.iter().any(|&code| matches!(code, 4 | 5))
134 }
135
136 pub fn to_ipa(&self) -> String {
137 self.tokens.iter().map(|token| token.ipa()).collect()
138 }
139
140 #[doc(hidden)]
141 pub fn raw_codes(&self) -> &[u8] {
142 &self.raw_phonemes
143 }
144}
145
146#[derive(Debug, Clone, PartialEq, Eq)]
147pub enum SentenceUnit {
148 Word(WordPhoneme),
149 Space,
150 ClauseBoundary(char),
151 Punctuation(char),
152}
153
154impl SentenceUnit {
155 pub fn from_text_unit(
156 unit: TextUnit,
157 phonemizer: &WordPhonemizer,
158 ) -> crate::error::Result<Self> {
159 let unit = match unit {
160 TextUnit::Word(word, _lang) => SentenceUnit::Word(phonemizer.phonemize_word(&word)?),
162 TextUnit::Space => SentenceUnit::Space,
163 TextUnit::ClauseBoundary(ch) => SentenceUnit::ClauseBoundary(ch),
164 TextUnit::Punctuation(ch) => SentenceUnit::Punctuation(ch),
165 };
166
167 Ok(unit)
168 }
169}
170
171fn map_code(code: u8, phdata: &PhonemeData, table: Option<&ActiveTable>) -> PhonemeToken {
172 match code {
173 2 => PhonemeToken::Stress(StressMarker::Unstressed),
174 3 => PhonemeToken::Stress(StressMarker::StressDown),
175 4 => PhonemeToken::Stress(StressMarker::Secondary),
176 5 => PhonemeToken::Stress(StressMarker::Tertiary),
177 6 => PhonemeToken::Stress(StressMarker::Primary),
178 7 => PhonemeToken::Stress(StressMarker::PriorityPrimary),
179 8 => PhonemeToken::Stress(StressMarker::Previous),
180 15 => PhonemeToken::WordBoundary,
181 _ => {
182 if let Some(at) = table {
183 phdata
184 .get(code, at)
185 .map(PhonemeTab::mnemonic_str)
186 .map(PhonemeToken::Symbol)
187 .unwrap_or(PhonemeToken::Control(code))
188 } else {
189 PhonemeToken::Control(code)
190 }
191 }
192 }
193}