piper-phoneme-streaming 0.1.1

A high-performance Rust library for streaming Text-to-Phoneme (G2P) conversion.
Documentation
use crate::expand_tasks::acronym::AcronymExpandTask;
use crate::expand_tasks::short_dict::ShortDictExpandTask;
use crate::text_expand::ExpandTask;

pub mod date;
pub mod decimal;
pub mod hour;
pub mod negative;
pub mod number;
pub mod percent;

pub use date::EnDateExpandTask;
pub use decimal::EnDecimalExpandTask;
pub use hour::EnHourExpandTask;
pub use negative::EnNegativeExpandTask;
pub use number::EnNumberExpandTask;
pub use percent::EnPercentExpandTask;

pub fn get_en_tasks() -> Vec<Box<dyn ExpandTask>> {
    vec![
        Box::new(ShortDictExpandTask::load_from_file_or_default(
            "data/short_dict_en.txt",
            include_str!("../../../data/short_dict_en.txt"),
        )),
        Box::new(AcronymExpandTask),
        Box::new(EnNegativeExpandTask),
        Box::new(EnHourExpandTask),
        Box::new(EnDateExpandTask),
        Box::new(EnPercentExpandTask),
        Box::new(EnDecimalExpandTask),
        Box::new(EnNumberExpandTask),
    ]
}

pub(crate) fn number_to_words_en(n: u64) -> Vec<&'static str> {
    if n == 0 {
        return vec!["zero"];
    }

    let mut result = Vec::new();

    let billions = n / 1_000_000_000;
    let millions = (n % 1_000_000_000) / 1_000_000;
    let thousands = (n % 1_000_000) / 1_000;
    let remainder = n % 1_000;

    if billions > 0 {
        result.extend(three_digits_to_words_en(billions));
        result.push("billion");
    }

    if millions > 0 {
        result.extend(three_digits_to_words_en(millions));
        result.push("million");
    }

    if thousands > 0 {
        result.extend(three_digits_to_words_en(thousands));
        result.push("thousand");
    }

    if remainder > 0 {
        if !result.is_empty() && remainder < 100 {
            result.push("and");
        }
        result.extend(three_digits_to_words_en(remainder));
    }

    result
}

pub(crate) fn three_digits_to_words_en(n: u64) -> Vec<&'static str> {
    let mut result = Vec::new();
    let hundreds = n / 100;
    let remainder = n % 100;

    if hundreds > 0 {
        result.push(ones_en(hundreds));
        result.push("hundred");
        if remainder > 0 {
            result.push("and");
        }
    }

    if remainder > 0 {
        if remainder < 20 {
            result.push(ones_en(remainder));
        } else {
            let tens = remainder / 10;
            let units = remainder % 10;
            result.push(tens_en(tens));
            if units > 0 {
                result.push(ones_en(units));
            }
        }
    }

    result
}

pub(crate) fn ones_en(n: u64) -> &'static str {
    match n {
        1 => "one",
        2 => "two",
        3 => "three",
        4 => "four",
        5 => "five",
        6 => "six",
        7 => "seven",
        8 => "eight",
        9 => "nine",
        10 => "ten",
        11 => "eleven",
        12 => "twelve",
        13 => "thirteen",
        14 => "fourteen",
        15 => "fifteen",
        16 => "sixteen",
        17 => "seventeen",
        18 => "eighteen",
        19 => "nineteen",
        _ => "",
    }
}

pub(crate) fn tens_en(n: u64) -> &'static str {
    match n {
        2 => "twenty",
        3 => "thirty",
        4 => "forty",
        5 => "fifty",
        6 => "sixty",
        7 => "seventy",
        8 => "eighty",
        9 => "ninety",
        _ => "",
    }
}

pub(crate) fn ordinal_words_en(n: u32) -> Vec<&'static str> {
    match n {
        1 => vec!["first"],
        2 => vec!["second"],
        3 => vec!["third"],
        4 => vec!["fourth"],
        5 => vec!["fifth"],
        6 => vec!["sixth"],
        7 => vec!["seventh"],
        8 => vec!["eighth"],
        9 => vec!["ninth"],
        10 => vec!["tenth"],
        11 => vec!["eleventh"],
        12 => vec!["twelfth"],
        13 => vec!["thirteenth"],
        14 => vec!["fourteenth"],
        15 => vec!["fifteenth"],
        16 => vec!["sixteenth"],
        17 => vec!["seventeenth"],
        18 => vec!["eighteenth"],
        19 => vec!["nineteenth"],
        20 => vec!["twentieth"],
        21 => vec!["twenty", "first"],
        22 => vec!["twenty", "second"],
        23 => vec!["twenty", "third"],
        24 => vec!["twenty", "fourth"],
        25 => vec!["twenty", "fifth"],
        26 => vec!["twenty", "sixth"],
        27 => vec!["twenty", "seventh"],
        28 => vec!["twenty", "eighth"],
        29 => vec!["twenty", "ninth"],
        30 => vec!["thirtieth"],
        31 => vec!["thirty", "first"],
        _ => vec![],
    }
}