use crate::TextUnit;
use crate::WordPhonemizer;
use crate::phoneme::PhonemeTab;
use crate::phoneme::load::{ActiveTable, PhonemeData};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Language {
English,
Vietnamese,
}
impl Language {
pub fn as_str(self) -> &'static str {
match self {
Self::English => "en",
Self::Vietnamese => "vi",
}
}
}
impl TryFrom<&str> for Language {
type Error = &'static str;
fn try_from(value: &str) -> Result<Self, Self::Error> {
match value {
"en" => Ok(Self::English),
"vi" => Ok(Self::Vietnamese),
_ => Err("unsupported language"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StressMarker {
Unstressed,
StressDown,
Secondary,
Tertiary,
Primary,
PriorityPrimary,
Previous,
}
impl StressMarker {
pub fn ipa(&self) -> &str {
match self {
Self::Unstressed => "",
Self::StressDown => "",
Self::Secondary => "",
Self::Tertiary => "",
Self::Primary => "",
Self::PriorityPrimary => "",
Self::Previous => "",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PhonemeToken {
Symbol(String),
Stress(StressMarker),
WordBoundary,
Control(u8),
}
impl PhonemeToken {
pub fn ipa(&self) -> &str {
match self {
PhonemeToken::Symbol(s) => s.as_str(),
PhonemeToken::Stress(s) => s.ipa(),
PhonemeToken::WordBoundary => " ",
PhonemeToken::Control(_) => "",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct WordFlags(pub(crate) u32);
impl WordFlags {
pub fn raw(self) -> u32 {
self.0
}
pub fn strend(self) -> bool {
self.0 & (1 << 9) != 0
}
pub fn strend2(self) -> bool {
self.0 & (1 << 10) != 0
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WordPhoneme {
pub language: Language,
pub normalized_word: String,
pub tokens: Vec<PhonemeToken>,
pub flags: WordFlags,
raw_phonemes: Vec<u8>,
}
impl WordPhoneme {
pub(crate) fn from_raw(
language: Language,
normalized_word: String,
raw_phonemes: Vec<u8>,
flags: u32,
phdata: &PhonemeData,
) -> Self {
let table = phdata.get_active_table(language.as_str()).ok();
let tokens = raw_phonemes
.iter()
.copied()
.take_while(|code| *code != 0)
.map(|code| map_code(code, phdata, table))
.collect();
Self {
language,
normalized_word,
tokens,
flags: WordFlags(flags),
raw_phonemes,
}
}
pub fn has_primary_stress(&self) -> bool {
self.raw_phonemes.iter().any(|&code| matches!(code, 6 | 7))
}
pub fn has_secondary_stress(&self) -> bool {
self.raw_phonemes.iter().any(|&code| matches!(code, 4 | 5))
}
pub fn to_ipa(&self) -> String {
self.tokens.iter().map(|token| token.ipa()).collect()
}
#[doc(hidden)]
pub fn raw_codes(&self) -> &[u8] {
&self.raw_phonemes
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SentenceUnit {
Word(WordPhoneme),
Space,
ClauseBoundary(char),
Punctuation(char),
}
impl SentenceUnit {
pub fn from_text_unit(
unit: TextUnit,
phonemizer: &WordPhonemizer,
) -> crate::error::Result<Self> {
let unit = match unit {
TextUnit::Word(word, _lang) => SentenceUnit::Word(phonemizer.phonemize_word(&word)?),
TextUnit::Space => SentenceUnit::Space,
TextUnit::ClauseBoundary(ch) => SentenceUnit::ClauseBoundary(ch),
TextUnit::Punctuation(ch) => SentenceUnit::Punctuation(ch),
};
Ok(unit)
}
}
fn map_code(code: u8, phdata: &PhonemeData, table: Option<&ActiveTable>) -> PhonemeToken {
match code {
2 => PhonemeToken::Stress(StressMarker::Unstressed),
3 => PhonemeToken::Stress(StressMarker::StressDown),
4 => PhonemeToken::Stress(StressMarker::Secondary),
5 => PhonemeToken::Stress(StressMarker::Tertiary),
6 => PhonemeToken::Stress(StressMarker::Primary),
7 => PhonemeToken::Stress(StressMarker::PriorityPrimary),
8 => PhonemeToken::Stress(StressMarker::Previous),
15 => PhonemeToken::WordBoundary,
_ => {
if let Some(at) = table {
phdata
.get(code, at)
.map(PhonemeTab::mnemonic_str)
.map(PhonemeToken::Symbol)
.unwrap_or(PhonemeToken::Control(code))
} else {
PhonemeToken::Control(code)
}
}
}
}