piper-phoneme-streaming 0.1.0

A high-performance Rust library for streaming Text-to-Phoneme (G2P) conversion.
Documentation
use crate::TextUnit;
use crate::WordPhonemizer;
use crate::phoneme::PhonemeData;
use crate::phoneme::PhonemeTab;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Language {
    English,
    Vietnamese,
}

impl Language {
    pub fn as_str(self) -> &'static str {
        match self {
            Self::English => "en",
            Self::Vietnamese => "vi",
        }
    }
}

impl TryFrom<&str> for Language {
    type Error = &'static str;

    fn try_from(value: &str) -> Result<Self, Self::Error> {
        match value {
            "en" => Ok(Self::English),
            "vi" => Ok(Self::Vietnamese),
            _ => Err("unsupported language"),
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StressMarker {
    Unstressed,
    StressDown,
    Secondary,
    Tertiary,
    Primary,
    PriorityPrimary,
    Previous,
}

impl StressMarker {
    pub fn ipa(&self) -> &str {
        match self {
            Self::Unstressed => "",
            Self::StressDown => "",
            Self::Secondary => "",
            Self::Tertiary => "",
            Self::Primary => "",
            Self::PriorityPrimary => "",
            Self::Previous => "",
        }
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PhonemeToken {
    Symbol(String),
    Stress(StressMarker),
    WordBoundary,
    Control(u8),
}

impl PhonemeToken {
    pub fn ipa(&self) -> &str {
        match self {
            PhonemeToken::Symbol(s) => s.as_str(),
            PhonemeToken::Stress(s) => s.ipa(),
            PhonemeToken::WordBoundary => " ",
            PhonemeToken::Control(_) => "",
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct WordFlags(pub(crate) u32);

impl WordFlags {
    pub fn raw(self) -> u32 {
        self.0
    }

    pub fn strend(self) -> bool {
        self.0 & (1 << 9) != 0
    }

    pub fn strend2(self) -> bool {
        self.0 & (1 << 10) != 0
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WordPhoneme {
    pub language: Language,
    pub normalized_word: String,
    pub tokens: Vec<PhonemeToken>,
    pub flags: WordFlags,
    raw_phonemes: Vec<u8>,
}

impl WordPhoneme {
    pub(crate) fn from_raw(
        language: Language,
        normalized_word: String,
        raw_phonemes: Vec<u8>,
        flags: u32,
        phdata: &PhonemeData,
    ) -> Self {
        let tokens = raw_phonemes
            .iter()
            .copied()
            .take_while(|code| *code != 0)
            .map(|code| map_code(code, phdata))
            .collect();

        Self {
            language,
            normalized_word,
            tokens,
            flags: WordFlags(flags),
            raw_phonemes,
        }
    }

    pub fn has_primary_stress(&self) -> bool {
        self.raw_phonemes.iter().any(|&code| matches!(code, 6 | 7))
    }

    pub fn has_secondary_stress(&self) -> bool {
        self.raw_phonemes.iter().any(|&code| matches!(code, 4 | 5))
    }

    pub fn to_ipa(&self) -> String {
        self.tokens.iter().map(|token| token.ipa()).collect()
    }

    #[doc(hidden)]
    pub fn raw_codes(&self) -> &[u8] {
        &self.raw_phonemes
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SentenceUnit {
    Word(WordPhoneme),
    Space,
    ClauseBoundary(char),
    Punctuation(char),
}

impl SentenceUnit {
    pub fn from_text_unit(
        unit: TextUnit,
        phonemizer: &WordPhonemizer,
    ) -> crate::error::Result<Self> {
        let unit = match unit {
            // _lang is ignored here; caller selects the right phonemizer per language (PR 2)
            TextUnit::Word(word, _lang) => SentenceUnit::Word(phonemizer.phonemize_word(&word)?),
            TextUnit::Space => SentenceUnit::Space,
            TextUnit::ClauseBoundary(ch) => SentenceUnit::ClauseBoundary(ch),
            TextUnit::Punctuation(ch) => SentenceUnit::Punctuation(ch),
        };

        Ok(unit)
    }
}

fn map_code(code: u8, phdata: &PhonemeData) -> PhonemeToken {
    match code {
        2 => PhonemeToken::Stress(StressMarker::Unstressed),
        3 => PhonemeToken::Stress(StressMarker::StressDown),
        4 => PhonemeToken::Stress(StressMarker::Secondary),
        5 => PhonemeToken::Stress(StressMarker::Tertiary),
        6 => PhonemeToken::Stress(StressMarker::Primary),
        7 => PhonemeToken::Stress(StressMarker::PriorityPrimary),
        8 => PhonemeToken::Stress(StressMarker::Previous),
        15 => PhonemeToken::WordBoundary,
        _ => phdata
            .get(code)
            .map(PhonemeTab::mnemonic_str)
            .map(PhonemeToken::Symbol)
            .unwrap_or(PhonemeToken::Control(code)),
    }
}