piper-phoneme-streaming 0.1.1

A high-performance Rust library for streaming Text-to-Phoneme (G2P) conversion.
Documentation
use piper_phoneme_streaming::dictionary::{
    Dictionary, RULE_GROUP_END, RULE_PHONEMES, RULE_POST, RULE_PRE,
};
use std::path::PathBuf;

#[test]
fn dump_rules() {
    let languages = ["en", "vi"];
    for lang in languages {
        dump_lang_rules(lang);
    }
}

fn dump_lang_rules(lang: &str) {
    let data_dir = PathBuf::from("vendor/espeak-ng-data");
    if !data_dir.join(format!("{}_dict", lang)).exists() {
        println!("Dictionary for {} not found at {:?}", lang, data_dir);
        return;
    }

    let dict = Dictionary::load(lang, &data_dir).expect("Failed to load dict");

    println!("\n--- {} Rules ---", lang.to_uppercase());

    // Dump Group 1 (single-char groups)
    for c in 0..=255u8 {
        if let Some(rules) = dict.group1(c) {
            let label = if c == 0 {
                "DEFAULT".to_string()
            } else if c.is_ascii_graphic() {
                format!("'{}'", c as char)
            } else {
                format!("0x{:x}", c)
            };
            println!("\nGroup {}:", label);
            print_rules(rules);
        }
    }

    // Dump Group 2 (two-char groups)
    for c in 0..=255u8 {
        for entry in dict.group2_entries_for(c) {
            let rules = dict.group2_rules(entry);
            let c1 = (entry.key & 0xff) as u8 as char;
            let c2 = (entry.key >> 8) as u8 as char;
            println!("\nGroup '{}{}':", c1, c2);
            print_rules(rules);
        }
    }
}

fn print_rules(rules: &[u8]) {
    let mut pos = 0;
    while pos < rules.len() && rules[pos] != RULE_GROUP_END {
        let mut rule_str = String::new();
        let _start_pos = pos;

        // Very basic decoder for the binary rule format
        // A real decoder would be much more complex, but this shows the raw-ish data
        while pos < rules.len() && rules[pos] != 0 && rules[pos] != RULE_GROUP_END {
            let b = rules[pos];
            match b {
                RULE_PHONEMES => rule_str.push_str(" -> "),
                RULE_PRE => rule_str.push_str(" [PRE] "),
                RULE_POST => rule_str.push_str(" [POST] "),
                32..=126 => rule_str.push(b as char),
                _ => rule_str.push_str(&format!("<0x{:x}>", b)),
            }
            pos += 1;
        }

        if !rule_str.is_empty() {
            println!("  {}", rule_str);
        }

        if pos < rules.len() && rules[pos] == 0 {
            pos += 1; // skip null terminator of the rule
        } else {
            break;
        }
    }
}