jpreprocess-core 0.13.2

Japanese text preprocessor for Text-to-Speech application (OpenJTalk rewrite in rust language).
Documentation
use std::fmt::Display;

use super::{Mora, MoraEnum};

pub(crate) fn mora_to_phoneme(mora: &Mora) -> (Option<Consonant>, Option<Vowel>) {
    let (consonant, vowel) = match mora.mora_enum {
        MoraEnum::Vyo => (Some(Consonant::By), Some(Vowel::O)),
        MoraEnum::Vyu => (Some(Consonant::By), Some(Vowel::U)),
        MoraEnum::Vya => (Some(Consonant::By), Some(Vowel::A)),
        MoraEnum::Vo => (Some(Consonant::V), Some(Vowel::O)),
        MoraEnum::Ve => (Some(Consonant::V), Some(Vowel::E)),
        MoraEnum::Vi => (Some(Consonant::V), Some(Vowel::I)),
        MoraEnum::Va => (Some(Consonant::V), Some(Vowel::A)),
        MoraEnum::Vu => (Some(Consonant::V), Some(Vowel::U)),
        MoraEnum::N => (Some(Consonant::Nn), None),
        MoraEnum::Wo => (None, Some(Vowel::O)),
        MoraEnum::We => (None, Some(Vowel::E)),
        MoraEnum::Wi => (None, Some(Vowel::I)),
        MoraEnum::Wa => (Some(Consonant::W), Some(Vowel::A)),
        MoraEnum::Xwa => (Some(Consonant::W), Some(Vowel::A)),
        MoraEnum::Ro => (Some(Consonant::R), Some(Vowel::O)),
        MoraEnum::Re => (Some(Consonant::R), Some(Vowel::E)),
        MoraEnum::Ru => (Some(Consonant::R), Some(Vowel::U)),
        MoraEnum::Ryo => (Some(Consonant::Ry), Some(Vowel::O)),
        MoraEnum::Ryu => (Some(Consonant::Ry), Some(Vowel::U)),
        MoraEnum::Rya => (Some(Consonant::Ry), Some(Vowel::A)),
        MoraEnum::Rye => (Some(Consonant::Ry), Some(Vowel::E)),
        MoraEnum::Ri => (Some(Consonant::R), Some(Vowel::I)),
        MoraEnum::Ra => (Some(Consonant::R), Some(Vowel::A)),
        MoraEnum::Yo => (Some(Consonant::Y), Some(Vowel::O)),
        MoraEnum::Xyo => (Some(Consonant::Y), Some(Vowel::O)),
        MoraEnum::Yu => (Some(Consonant::Y), Some(Vowel::U)),
        MoraEnum::Xyu => (Some(Consonant::Y), Some(Vowel::U)),
        MoraEnum::Ya => (Some(Consonant::Y), Some(Vowel::A)),
        MoraEnum::Xya => (Some(Consonant::Y), Some(Vowel::A)),
        MoraEnum::Mo => (Some(Consonant::M), Some(Vowel::O)),
        MoraEnum::Me => (Some(Consonant::M), Some(Vowel::E)),
        MoraEnum::Mu => (Some(Consonant::M), Some(Vowel::U)),
        MoraEnum::Myo => (Some(Consonant::My), Some(Vowel::O)),
        MoraEnum::Myu => (Some(Consonant::My), Some(Vowel::U)),
        MoraEnum::Mya => (Some(Consonant::My), Some(Vowel::A)),
        MoraEnum::Mye => (Some(Consonant::My), Some(Vowel::E)),
        MoraEnum::Mi => (Some(Consonant::M), Some(Vowel::I)),
        MoraEnum::Ma => (Some(Consonant::M), Some(Vowel::A)),
        MoraEnum::Po => (Some(Consonant::P), Some(Vowel::O)),
        MoraEnum::Bo => (Some(Consonant::B), Some(Vowel::O)),
        MoraEnum::Ho => (Some(Consonant::H), Some(Vowel::O)),
        MoraEnum::Pe => (Some(Consonant::P), Some(Vowel::E)),
        MoraEnum::Be => (Some(Consonant::B), Some(Vowel::E)),
        MoraEnum::He => (Some(Consonant::H), Some(Vowel::E)),
        MoraEnum::Pu => (Some(Consonant::P), Some(Vowel::U)),
        MoraEnum::Bu => (Some(Consonant::B), Some(Vowel::U)),
        MoraEnum::Fo => (Some(Consonant::F), Some(Vowel::O)),
        MoraEnum::Fe => (Some(Consonant::F), Some(Vowel::E)),
        MoraEnum::Fi => (Some(Consonant::F), Some(Vowel::I)),
        MoraEnum::Fa => (Some(Consonant::F), Some(Vowel::A)),
        MoraEnum::Fu => (Some(Consonant::F), Some(Vowel::U)),
        MoraEnum::Pyo => (Some(Consonant::Py), Some(Vowel::O)),
        MoraEnum::Pyu => (Some(Consonant::Py), Some(Vowel::U)),
        MoraEnum::Pya => (Some(Consonant::Py), Some(Vowel::A)),
        MoraEnum::Pye => (Some(Consonant::Py), Some(Vowel::E)),
        MoraEnum::Pi => (Some(Consonant::P), Some(Vowel::I)),
        MoraEnum::Byo => (Some(Consonant::By), Some(Vowel::O)),
        MoraEnum::Byu => (Some(Consonant::By), Some(Vowel::U)),
        MoraEnum::Bya => (Some(Consonant::By), Some(Vowel::A)),
        MoraEnum::Bye => (Some(Consonant::By), Some(Vowel::E)),
        MoraEnum::Bi => (Some(Consonant::B), Some(Vowel::I)),
        MoraEnum::Hyo => (Some(Consonant::Hy), Some(Vowel::O)),
        MoraEnum::Hyu => (Some(Consonant::Hy), Some(Vowel::U)),
        MoraEnum::Hya => (Some(Consonant::Hy), Some(Vowel::A)),
        MoraEnum::Hye => (Some(Consonant::Hy), Some(Vowel::E)),
        MoraEnum::Hi => (Some(Consonant::H), Some(Vowel::I)),
        MoraEnum::Pa => (Some(Consonant::P), Some(Vowel::A)),
        MoraEnum::Ba => (Some(Consonant::B), Some(Vowel::A)),
        MoraEnum::Ha => (Some(Consonant::H), Some(Vowel::A)),
        MoraEnum::No => (Some(Consonant::N), Some(Vowel::O)),
        MoraEnum::Ne => (Some(Consonant::N), Some(Vowel::E)),
        MoraEnum::Nu => (Some(Consonant::N), Some(Vowel::U)),
        MoraEnum::Nyo => (Some(Consonant::Ny), Some(Vowel::O)),
        MoraEnum::Nyu => (Some(Consonant::Ny), Some(Vowel::U)),
        MoraEnum::Nya => (Some(Consonant::Ny), Some(Vowel::A)),
        MoraEnum::Nye => (Some(Consonant::Ny), Some(Vowel::E)),
        MoraEnum::Ni => (Some(Consonant::N), Some(Vowel::I)),
        MoraEnum::Na => (Some(Consonant::N), Some(Vowel::A)),
        MoraEnum::Dwu => (Some(Consonant::D), Some(Vowel::U)),
        MoraEnum::Do => (Some(Consonant::D), Some(Vowel::O)),
        MoraEnum::Twu => (Some(Consonant::T), Some(Vowel::U)),
        MoraEnum::To => (Some(Consonant::T), Some(Vowel::O)),
        MoraEnum::Dho => (Some(Consonant::Dy), Some(Vowel::O)),
        MoraEnum::Dhu => (Some(Consonant::Dy), Some(Vowel::U)),
        MoraEnum::Dha => (Some(Consonant::Dy), Some(Vowel::A)),
        MoraEnum::Dhi => (Some(Consonant::D), Some(Vowel::I)),
        MoraEnum::De => (Some(Consonant::D), Some(Vowel::E)),
        MoraEnum::Tho => (Some(Consonant::Ty), Some(Vowel::O)),
        MoraEnum::Thu => (Some(Consonant::Ty), Some(Vowel::U)),
        MoraEnum::Tha => (Some(Consonant::Ty), Some(Vowel::A)),
        MoraEnum::Thi => (Some(Consonant::T), Some(Vowel::I)),
        MoraEnum::Te => (Some(Consonant::T), Some(Vowel::E)),
        MoraEnum::Du => (Some(Consonant::Z), Some(Vowel::U)),
        MoraEnum::Tso => (Some(Consonant::Ts), Some(Vowel::O)),
        MoraEnum::Tse => (Some(Consonant::Ts), Some(Vowel::E)),
        MoraEnum::Tsi => (Some(Consonant::Ts), Some(Vowel::I)),
        MoraEnum::Tsa => (Some(Consonant::Ts), Some(Vowel::A)),
        MoraEnum::Tsu => (Some(Consonant::Ts), Some(Vowel::U)),
        MoraEnum::Xtsu => (Some(Consonant::Cl), None),
        MoraEnum::Di => (Some(Consonant::J), Some(Vowel::I)),
        MoraEnum::Cho => (Some(Consonant::Ch), Some(Vowel::O)),
        MoraEnum::Chu => (Some(Consonant::Ch), Some(Vowel::U)),
        MoraEnum::Cha => (Some(Consonant::Ch), Some(Vowel::A)),
        MoraEnum::Che => (Some(Consonant::Ch), Some(Vowel::E)),
        MoraEnum::Chi => (Some(Consonant::Ch), Some(Vowel::I)),
        MoraEnum::Da => (Some(Consonant::D), Some(Vowel::A)),
        MoraEnum::Ta => (Some(Consonant::T), Some(Vowel::A)),
        MoraEnum::Zo => (Some(Consonant::Z), Some(Vowel::O)),
        MoraEnum::So => (Some(Consonant::S), Some(Vowel::O)),
        MoraEnum::Ze => (Some(Consonant::Z), Some(Vowel::E)),
        MoraEnum::Se => (Some(Consonant::S), Some(Vowel::E)),
        MoraEnum::Zwi => (Some(Consonant::Z), Some(Vowel::I)),
        MoraEnum::Zu => (Some(Consonant::Z), Some(Vowel::U)),
        MoraEnum::Swi => (Some(Consonant::S), Some(Vowel::I)),
        MoraEnum::Su => (Some(Consonant::S), Some(Vowel::U)),
        MoraEnum::Jo => (Some(Consonant::J), Some(Vowel::O)),
        MoraEnum::Ju => (Some(Consonant::J), Some(Vowel::U)),
        MoraEnum::Ja => (Some(Consonant::J), Some(Vowel::A)),
        MoraEnum::Je => (Some(Consonant::J), Some(Vowel::E)),
        MoraEnum::Ji => (Some(Consonant::J), Some(Vowel::I)),
        MoraEnum::Sho => (Some(Consonant::Sh), Some(Vowel::O)),
        MoraEnum::Shu => (Some(Consonant::Sh), Some(Vowel::U)),
        MoraEnum::Sha => (Some(Consonant::Sh), Some(Vowel::A)),
        MoraEnum::She => (Some(Consonant::Sh), Some(Vowel::E)),
        MoraEnum::Shi => (Some(Consonant::Sh), Some(Vowel::I)),
        MoraEnum::Za => (Some(Consonant::Z), Some(Vowel::A)),
        MoraEnum::Sa => (Some(Consonant::S), Some(Vowel::A)),
        MoraEnum::Go => (Some(Consonant::G), Some(Vowel::O)),
        MoraEnum::Ko => (Some(Consonant::K), Some(Vowel::O)),
        MoraEnum::Ge => (Some(Consonant::G), Some(Vowel::E)),
        MoraEnum::Ke => (Some(Consonant::K), Some(Vowel::E)),
        MoraEnum::Xke => (Some(Consonant::K), Some(Vowel::E)),
        MoraEnum::Gwa => (Some(Consonant::Gw), Some(Vowel::A)),
        MoraEnum::Gu => (Some(Consonant::G), Some(Vowel::U)),
        MoraEnum::Kwa => (Some(Consonant::Kw), Some(Vowel::A)),
        MoraEnum::Ku => (Some(Consonant::K), Some(Vowel::U)),
        MoraEnum::Gyo => (Some(Consonant::Gy), Some(Vowel::O)),
        MoraEnum::Gyu => (Some(Consonant::Gy), Some(Vowel::U)),
        MoraEnum::Gya => (Some(Consonant::Gy), Some(Vowel::A)),
        MoraEnum::Gye => (Some(Consonant::Gy), Some(Vowel::E)),
        MoraEnum::Gi => (Some(Consonant::G), Some(Vowel::I)),
        MoraEnum::Kyo => (Some(Consonant::Ky), Some(Vowel::O)),
        MoraEnum::Kyu => (Some(Consonant::Ky), Some(Vowel::U)),
        MoraEnum::Kya => (Some(Consonant::Ky), Some(Vowel::A)),
        MoraEnum::Kye => (Some(Consonant::Ky), Some(Vowel::E)),
        MoraEnum::Ki => (Some(Consonant::K), Some(Vowel::I)),
        MoraEnum::Ga => (Some(Consonant::G), Some(Vowel::A)),
        MoraEnum::Ka => (Some(Consonant::K), Some(Vowel::A)),
        MoraEnum::O => (None, Some(Vowel::O)),
        MoraEnum::Xo => (None, Some(Vowel::O)),
        MoraEnum::E => (None, Some(Vowel::E)),
        MoraEnum::Xe => (None, Some(Vowel::E)),
        MoraEnum::Who => (Some(Consonant::W), Some(Vowel::O)),
        MoraEnum::Whe => (Some(Consonant::W), Some(Vowel::E)),
        MoraEnum::Whi => (Some(Consonant::W), Some(Vowel::I)),
        MoraEnum::U => (None, Some(Vowel::U)),
        MoraEnum::Xu => (None, Some(Vowel::U)),
        MoraEnum::Ye => (Some(Consonant::Y), Some(Vowel::E)),
        MoraEnum::I => (None, Some(Vowel::I)),
        MoraEnum::Xi => (None, Some(Vowel::I)),
        MoraEnum::A => (None, Some(Vowel::A)),
        MoraEnum::Xa => (None, Some(Vowel::A)),

        MoraEnum::Long => (Some(Consonant::Long), None),
        MoraEnum::Touten | MoraEnum::Question => (None, None),
    };
    (
        consonant,
        vowel.map(|vowel| {
            if mora.is_voiced {
                vowel
            } else {
                vowel.into_unvoiced()
            }
        }),
    )
}

#[derive(Clone, Copy, Debug)]
pub enum Consonant {
    V,
    W,
    R,
    Ry,
    Y,
    M,
    My,
    P,
    B,
    H,
    F,
    Py,
    By,
    Hy,
    N,
    Ny,
    D,
    T,
    Dy,
    Ty,
    Ts,
    Ch,
    Z,
    S,
    J,
    Sh,
    G,
    K,
    Gy,
    Ky,
    Gw,
    Kw,

    // Consonants without vowel
    ///    Nn,
    ///    Cl,

    // Consonant removed from output
    ///    Long,
}

impl Display for Consonant {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let s = match self {
            Self::V => "v",
            Self::W => "w",
            Self::R => "r",
            Self::Ry => "ry",
            Self::Y => "y",
            Self::M => "m",
            Self::My => "my",
            Self::P => "p",
            Self::B => "b",
            Self::H => "h",
            Self::F => "f",
            Self::Py => "py",
            Self::By => "by",
            Self::Hy => "hy",
            Self::N => "n",
            Self::Ny => "ny",
            Self::D => "d",
            Self::T => "t",
            Self::Dy => "dy",
            Self::Ty => "ty",
            Self::Ts => "ts",
            Self::Ch => "ch",
            Self::Z => "z",
            Self::S => "s",
            Self::J => "j",
            Self::Sh => "sh",
            Self::G => "g",
            Self::K => "k",
            Self::Gy => "gy",
            Self::Ky => "ky",
            Self::Gw => "gw",
            Self::Kw => "kw",

            Self::Nn => "N",
            Self::Cl => "cl",

            Self::Long => "-",
        };
        write!(f, "{}", s)
    }
}

#[derive(Clone, Copy, Debug)]
pub enum Vowel {
    A,
    I,
    U,
    E,
    O,
    AUnvoiced,
    IUnvoiced,
    UUnvoiced,
    EUnvoiced,
    OUnvoiced,
}

impl Vowel {
    pub fn into_unvoiced(self) -> Self {
        match self {
            Self::A => Self::AUnvoiced,
            Self::I => Self::IUnvoiced,
            Self::U => Self::UUnvoiced,
            Self::E => Self::EUnvoiced,
            Self::O => Self::OUnvoiced,
            rest => rest,
        }
    }
}

impl Display for Vowel {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let s = match self {
            Self::A => "a",
            Self::I => "i",
            Self::U => "u",
            Self::E => "e",
            Self::O => "o",
            Self::AUnvoiced => "A",
            Self::IUnvoiced => "I",
            Self::UUnvoiced => "U",
            Self::EUnvoiced => "E",
            Self::OUnvoiced => "O",
        };
        write!(f, "{}", s)
    }
}