piper-phoneme-streaming 0.1.1

A high-performance Rust library for streaming Text-to-Phoneme (G2P) conversion.
Documentation
use crate::expand_tasks::acronym::AcronymExpandTask;
use crate::expand_tasks::short_dict::ShortDictExpandTask;
use crate::text_expand::ExpandTask;

pub mod date;
pub mod hour;
pub mod number;

pub use date::ViDateExpandTask;
pub use hour::ViHourExpandTask;
pub use number::ViNumberExpandTask;

pub fn get_vi_tasks() -> Vec<Box<dyn ExpandTask>> {
    vec![
        Box::new(ShortDictExpandTask::load_from_file_or_default(
            "data/short_dict_vi.txt",
            include_str!("../../../data/short_dict_vi.txt"),
        )),
        Box::new(AcronymExpandTask),
        Box::new(ViHourExpandTask),
        Box::new(ViDateExpandTask),
        Box::new(ViNumberExpandTask),
    ]
}

pub(crate) fn number_to_words_vi(n: u64) -> Vec<&'static str> {
    if n == 0 {
        return vec!["không"];
    }

    let mut result = Vec::new();

    let billions = n / 1_000_000_000;
    let millions = (n % 1_000_000_000) / 1_000_000;
    let thousands = (n % 1_000_000) / 1_000;
    let remainder = n % 1_000;

    if billions > 0 {
        result.extend(three_digits_to_words_vi(billions, false));
        result.push("tỷ");
    }

    if millions > 0 {
        result.extend(three_digits_to_words_vi(millions, !result.is_empty()));
        result.push("triệu");
    }

    if thousands > 0 {
        result.extend(three_digits_to_words_vi(thousands, !result.is_empty()));
        result.push("nghìn");
    }

    if remainder > 0 {
        result.extend(three_digits_to_words_vi(remainder, !result.is_empty()));
    }

    result
}

pub(crate) fn three_digits_to_words_vi(n: u64, has_prev: bool) -> Vec<&'static str> {
    let mut result = Vec::new();
    let hundreds = n / 100;
    let remainder = n % 100;

    if hundreds > 0 || has_prev {
        result.push(ones_vi(hundreds));
        result.push("trăm");
    }

    if remainder > 0 {
        if remainder < 10 {
            if !result.is_empty() {
                result.push("linh");
            }
            result.push(ones_vi(remainder));
        } else if remainder < 20 {
            result.push("mười");
            let units = remainder % 10;
            if units > 0 {
                if units == 5 {
                    result.push("lăm");
                } else {
                    result.push(ones_vi(units));
                }
            }
        } else {
            let tens = remainder / 10;
            let units = remainder % 10;
            result.push(ones_vi(tens));
            result.push("mươi");
            if units > 0 {
                if units == 1 {
                    result.push("mốt");
                } else if units == 5 {
                    result.push("lăm");
                } else if units == 4 {
                    result.push("");
                } else {
                    result.push(ones_vi(units));
                }
            }
        }
    } else if has_prev && hundreds > 0 {
        // e.g. 100 -> "một trăm" (no remainder, but we already added trăm)
    }

    result
}

pub(crate) fn ones_vi(n: u64) -> &'static str {
    match n {
        0 => "không",
        1 => "một",
        2 => "hai",
        3 => "ba",
        4 => "bốn",
        5 => "năm",
        6 => "sáu",
        7 => "bảy",
        8 => "tám",
        9 => "chín",
        _ => "",
    }
}