use super::super::types::{ExecutorResult, PreprocessedData};
use crate::execution::template::{PhonemizerBackend, TokenizerType};
use crate::runtime_adapter::AdapterError;
pub fn tokenize_step(
data: PreprocessedData,
tokenizer_path: &str,
tokenizer_type: &TokenizerType,
max_length: Option<usize>,
) -> ExecutorResult<PreprocessedData> {
use tokenizers::Tokenizer;
let text = match data {
PreprocessedData::Text(text) => text,
_ => {
return Err(AdapterError::InvalidInput(
"Tokenize requires text input".to_string(),
))
}
};
let tokenizer = match tokenizer_type {
TokenizerType::WordPiece | TokenizerType::BPE => Tokenizer::from_file(tokenizer_path)
.map_err(|e| {
AdapterError::InvalidInput(format!(
"Failed to load tokenizer from {}: {}",
tokenizer_path, e
))
})?,
TokenizerType::SentencePiece => {
return Err(AdapterError::InvalidInput(
"SentencePiece tokenizer not yet implemented".to_string(),
));
}
};
let encoding = tokenizer
.encode(text.clone(), false)
.map_err(|e| AdapterError::InvalidInput(format!("Tokenization failed: {}", e)))?;
let mut ids: Vec<usize> = encoding.get_ids().iter().map(|&id| id as usize).collect();
let mut attention_mask: Vec<usize> = encoding
.get_attention_mask()
.iter()
.map(|&mask| mask as usize)
.collect();
let mut token_type_ids: Vec<usize> = encoding
.get_type_ids()
.iter()
.map(|&type_id| type_id as usize)
.collect();
if let Some(max_len) = max_length {
if ids.len() > max_len {
ids.truncate(max_len);
attention_mask.truncate(max_len);
token_type_ids.truncate(max_len);
}
}
Ok(PreprocessedData::TokenIds {
ids,
attention_mask,
token_type_ids,
vocab_file: tokenizer_path.to_string(),
original_text: text,
})
}
const KOKORO_SILENCE_TOKEN_ID: i64 = 30;
pub fn phonemize_step(
data: PreprocessedData,
tokens_path: &str,
backend: &PhonemizerBackend,
dict_path: Option<&str>,
language: Option<&str>,
add_padding: bool,
normalize_text: bool,
silence_tokens: u8,
) -> ExecutorResult<PreprocessedData> {
use crate::phonemizer::load_tokens_map;
let text = match data {
PreprocessedData::Text(text) => text,
_ => {
return Err(AdapterError::InvalidInput(
"Phonemize requires text input".to_string(),
))
}
};
let tokens_content = std::fs::read_to_string(tokens_path).map_err(|e| {
AdapterError::InvalidInput(format!("Failed to read tokens file {}: {}", tokens_path, e))
})?;
let tokens_map = load_tokens_map(&tokens_content);
let processed_text = if normalize_text {
normalize_text_for_tts(&text)
} else {
text.clone()
};
let base_path = std::path::Path::new(tokens_path)
.parent()
.unwrap_or(std::path::Path::new("."))
.to_str()
.unwrap_or(".");
let backend_impl = backend.create(base_path, dict_path, language);
let phonemes = backend_impl.phonemize(&processed_text, &tokens_map)?;
let mut ids: Vec<i64> = Vec::new();
if add_padding {
ids.push(0); }
if silence_tokens > 0 {
ids.extend(std::iter::repeat_n(
KOKORO_SILENCE_TOKEN_ID,
silence_tokens as usize,
));
}
for c in phonemes.chars() {
if let Some(&id) = tokens_map.get(&c) {
ids.push(id);
} else if c == ' ' {
if let Some(&id) = tokens_map.get(&' ') {
ids.push(id);
}
}
}
if add_padding {
ids.push(0); }
Ok(PreprocessedData::PhonemeIds {
ids,
phonemes,
original_text: text,
})
}
pub fn normalize_text_for_tts(text: &str) -> String {
let mut result = text.to_string();
result = parse_phoneme_links(&result);
result = result.replace('\u{3001}', ", "); result = result.replace('\u{3002}', ". "); result = result.replace('\u{FF01}', "! "); result = result.replace('\u{FF0C}', ", "); result = result.replace('\u{FF1A}', ": "); result = result.replace('\u{FF1B}', "; "); result = result.replace('\u{FF1F}', "? ");
result = result.replace(['\u{2018}', '\u{2019}'], "'");
result = result.replace(['\u{201C}', '\u{201D}'], "\"");
result = result.replace("Dr.", "Doctor");
result = result.replace("Mr.", "Mister");
result = result.replace("Mrs.", "Missus");
result = result.replace("Ms.", "Miss");
result = result.replace("etc.", "etcetera");
result = expand_currency(&result);
result = expand_percentage(&result);
result = expand_numbers(&result);
result = result.replace("...", "\u{2026}");
result = result.replace(" .", ".");
result = result.replace(" ?", "?");
result = result.replace(" !", "!");
result = result.replace(" ,", ",");
result = result.replace(" ;", ";");
let chars: Vec<char> = result.chars().collect();
let mut spaced = String::with_capacity(result.len() + 16);
for i in 0..chars.len() {
spaced.push(chars[i]);
if i + 1 < chars.len()
&& matches!(chars[i], '.' | '!' | '?' | '\u{2026}')
&& chars[i + 1].is_alphanumeric()
{
spaced.push(' ');
}
}
result = spaced;
let mut prev_space = false;
result = result
.chars()
.filter_map(|c| {
if c.is_whitespace() {
if prev_space {
None
} else {
prev_space = true;
Some(' ')
}
} else {
prev_space = false;
Some(c)
}
})
.collect();
result.trim().to_string()
}
pub(crate) const PHONEME_LINK_START: char = '\x01';
pub(crate) const PHONEME_LINK_END: char = '\x02';
fn parse_phoneme_links(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut remaining = text;
while let Some(bracket_start) = remaining.find('[') {
result.push_str(&remaining[..bracket_start]);
let after_bracket = &remaining[bracket_start + 1..];
if let Some(bracket_end) = after_bracket.find(']') {
let link_text = &after_bracket[..bracket_end];
let after_close = &after_bracket[bracket_end + 1..];
if after_close.starts_with("(/") {
if let Some(paren_end) = after_close.find("/)") {
let phonemes = &after_close[2..paren_end];
result.push(PHONEME_LINK_START);
result.push_str(phonemes);
result.push(PHONEME_LINK_END);
remaining = &after_close[paren_end + 2..];
continue;
}
}
if after_close.starts_with('(') {
if let Some(paren_end) = after_close.find(')') {
let _feature = &after_close[1..paren_end];
result.push_str(link_text);
remaining = &after_close[paren_end + 1..];
continue;
}
}
result.push('[');
result.push_str(link_text);
result.push(']');
remaining = after_close;
} else {
result.push('[');
remaining = after_bracket;
}
}
result.push_str(remaining);
result
}
fn expand_currency(text: &str) -> String {
let mut result = String::new();
let chars: Vec<char> = text.chars().collect();
let mut i = 0;
while i < chars.len() {
if chars[i] == '$' && i + 1 < chars.len() && chars[i + 1].is_ascii_digit() {
let start = i + 1;
let mut end = start;
let mut has_dot = false;
while end < chars.len()
&& (chars[end].is_ascii_digit() || (chars[end] == '.' && !has_dot))
{
if chars[end] == '.' {
has_dot = true;
}
end += 1;
}
let num_str: String = chars[start..end].iter().collect();
if has_dot {
let parts: Vec<&str> = num_str.split('.').collect();
if parts.len() == 2 {
let dollars = parts[0];
let cents = parts[1];
if cents == "00" {
result.push_str(&format!("{} dollars", dollars));
} else {
result.push_str(&format!("{} dollars and {} cents", dollars, cents));
}
} else {
result.push_str(&format!("{} dollars", num_str));
}
} else {
result.push_str(&format!("{} dollars", num_str));
}
i = end;
} else {
result.push(chars[i]);
i += 1;
}
}
result
}
fn expand_percentage(text: &str) -> String {
let mut result = String::new();
let words: Vec<&str> = text.split(' ').collect();
for (i, word) in words.iter().enumerate() {
if i > 0 {
result.push(' ');
}
if let Some(num_part) = word.strip_suffix('%') {
if !num_part.is_empty() && num_part.chars().all(|c| c.is_ascii_digit() || c == '.') {
result.push_str(num_part);
result.push_str(" percent");
} else {
result.push_str(word);
}
} else {
result.push_str(word);
}
}
result
}
fn expand_numbers(text: &str) -> String {
let mut result = String::new();
let words: Vec<&str> = text.split(' ').collect();
for (i, word) in words.iter().enumerate() {
if i > 0 {
result.push(' ');
}
let clean = word.trim_end_matches(|c: char| !c.is_ascii_digit());
let suffix = &word[clean.len()..];
if !clean.is_empty() && clean.chars().all(|c| c.is_ascii_digit()) {
if let Ok(n) = clean.parse::<u64>() {
if suffix.is_empty() {
result.push_str(&number_to_words(n));
} else {
result.push_str(word);
}
} else {
result.push_str(word);
}
} else {
result.push_str(word);
}
}
result
}
fn number_to_words(n: u64) -> String {
if n == 0 {
return "zero".to_string();
}
let ones = [
"",
"one",
"two",
"three",
"four",
"five",
"six",
"seven",
"eight",
"nine",
"ten",
"eleven",
"twelve",
"thirteen",
"fourteen",
"fifteen",
"sixteen",
"seventeen",
"eighteen",
"nineteen",
];
let tens = [
"", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety",
];
fn convert(n: u64, ones: &[&str], tens: &[&str]) -> String {
if n == 0 {
return String::new();
}
if n < 20 {
return ones[n as usize].to_string();
}
if n < 100 {
let t = tens[(n / 10) as usize].to_string();
let o = convert(n % 10, ones, tens);
return if o.is_empty() {
t
} else {
format!("{} {}", t, o)
};
}
if n < 1000 {
let h = format!("{} hundred", ones[(n / 100) as usize]);
let r = convert(n % 100, ones, tens);
return if r.is_empty() {
h
} else {
format!("{} {}", h, r)
};
}
let scales: &[(u64, &str)] = &[
(1_000_000_000_000, "trillion"),
(1_000_000_000, "billion"),
(1_000_000, "million"),
(1_000, "thousand"),
];
for &(scale, name) in scales {
if n >= scale {
let high = convert(n / scale, ones, tens);
let low = convert(n % scale, ones, tens);
return if low.is_empty() {
format!("{} {}", high, name)
} else {
format!("{} {} {}", high, name, low)
};
}
}
String::new()
}
convert(n, &ones, &tens)
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;
use crate::execution::preprocessing::backends::MisakiBackend;
use crate::execution::preprocessing::backends::PhonemizerBackend as PhonemizerBackendTrait;
use crate::execution::template::PhonemizerBackend;
#[test]
fn test_normalize_ellipsis_to_unicode() {
let result = normalize_text_for_tts("You look...Lost");
assert!(
result.contains('\u{2026}'),
"Expected Unicode ellipsis (…), got: {}",
result
);
assert!(!result.contains("..."), "ASCII ellipsis should be replaced");
}
#[test]
fn test_normalize_spaced_question_mark() {
assert_eq!(normalize_text_for_tts("you ?"), "you?");
}
#[test]
fn test_normalize_spaced_period() {
assert_eq!(normalize_text_for_tts("hello ."), "hello.");
}
#[test]
fn test_normalize_spaced_exclamation() {
assert_eq!(normalize_text_for_tts("wow !"), "wow!");
}
#[test]
fn test_normalize_spaced_comma() {
assert_eq!(normalize_text_for_tts("Beer , Ale"), "Beer, Ale");
}
#[test]
fn test_normalize_space_after_punct_before_word() {
let result = normalize_text_for_tts("look...Lost");
assert_eq!(result, "look\u{2026} Lost");
}
#[test]
fn test_normalize_period_joined_words() {
let result = normalize_text_for_tts("Hello.World");
assert_eq!(result, "Hello. World");
}
#[test]
fn test_normalize_preserves_normal_punctuation() {
let result = normalize_text_for_tts("Hello. How are you?");
assert_eq!(result, "Hello. How are you?");
}
#[test]
fn test_normalize_full_example() {
let input = "Hello there. You look...Lost. Can I help you ? we have Beer, Ale and an extensive menu.";
let result = normalize_text_for_tts(input);
assert_eq!(
result,
"Hello there. You look\u{2026} Lost. Can I help you? we have Beer, Ale and an extensive menu."
);
}
#[test]
fn test_normalize_abbreviation_expansion() {
let result = normalize_text_for_tts("Dr. Smith and Mr. Jones");
assert_eq!(result, "Doctor Smith and Mister Jones");
}
#[test]
fn test_cjk_punctuation_normalization() {
assert_eq!(normalize_text_for_tts("hello\u{3001}world"), "hello, world");
assert_eq!(normalize_text_for_tts("hello\u{3002}world"), "hello. world");
assert_eq!(normalize_text_for_tts("hello\u{FF01}world"), "hello! world");
assert_eq!(normalize_text_for_tts("hello\u{FF0C}world"), "hello, world");
assert_eq!(normalize_text_for_tts("hello\u{FF1A}world"), "hello: world");
assert_eq!(normalize_text_for_tts("hello\u{FF1B}world"), "hello; world");
assert_eq!(normalize_text_for_tts("hello\u{FF1F}world"), "hello? world");
}
fn kokoro_fixture_path() -> String {
let workspace_path = "repos/xybrid/integration-tests/fixtures/models/kokoro-82m";
if std::path::Path::new(workspace_path).join("misaki").exists() {
return workspace_path.to_string();
}
let crate_path = "../../integration-tests/fixtures/models/kokoro-82m";
if std::path::Path::new(crate_path).join("misaki").exists() {
return crate_path.to_string();
}
env!("CARGO_MANIFEST_DIR").to_string()
+ "/../../integration-tests/fixtures/models/kokoro-82m"
}
fn kokoro_vocab() -> HashMap<char, i64> {
use crate::phonemizer::load_tokens_map;
let base = kokoro_fixture_path();
let tokens_path = std::path::Path::new(&base).join("tokens.txt");
let tokens_content = std::fs::read_to_string(&tokens_path)
.unwrap_or_else(|e| panic!("Failed to read tokens.txt at {:?}: {}", tokens_path, e));
load_tokens_map(&tokens_content)
}
fn has_kokoro_fixtures() -> bool {
let base = kokoro_fixture_path();
std::path::Path::new(&base)
.join("misaki")
.join("us_gold.json")
.exists()
}
#[test]
fn test_misaki_preserves_period() {
if !has_kokoro_fixtures() {
eprintln!("Skipping: Kokoro fixtures not found");
return;
}
let vocab = kokoro_vocab();
let base = kokoro_fixture_path();
let backend = MisakiBackend::new(base);
let result = backend.phonemize("Lost. Can", &vocab).unwrap();
assert!(
result.contains('.'),
"Period should be preserved in phoneme output, got: {}",
result
);
}
#[test]
fn test_misaki_preserves_comma() {
if !has_kokoro_fixtures() {
eprintln!("Skipping: Kokoro fixtures not found");
return;
}
let vocab = kokoro_vocab();
let base = kokoro_fixture_path();
let backend = MisakiBackend::new(base);
let result = backend.phonemize("Beer, Ale", &vocab).unwrap();
assert!(
result.contains(','),
"Comma should be preserved in phoneme output, got: {}",
result
);
}
#[test]
fn test_misaki_preserves_question_mark() {
if !has_kokoro_fixtures() {
eprintln!("Skipping: Kokoro fixtures not found");
return;
}
let vocab = kokoro_vocab();
let base = kokoro_fixture_path();
let backend = MisakiBackend::new(base);
let result = backend.phonemize("help you?", &vocab).unwrap();
assert!(
result.contains('?'),
"Question mark should be preserved in phoneme output, got: {}",
result
);
}
#[test]
fn test_misaki_preserves_ellipsis() {
if !has_kokoro_fixtures() {
eprintln!("Skipping: Kokoro fixtures not found");
return;
}
let vocab = kokoro_vocab();
let base = kokoro_fixture_path();
let backend = MisakiBackend::new(base);
let result = backend.phonemize("look\u{2026} Lost", &vocab).unwrap();
assert!(
result.contains('\u{2026}'),
"Unicode ellipsis should be preserved in phoneme output, got: {}",
result
);
}
#[test]
fn test_misaki_standalone_punctuation() {
if !has_kokoro_fixtures() {
eprintln!("Skipping: Kokoro fixtures not found");
return;
}
let vocab = kokoro_vocab();
let base = kokoro_fixture_path();
let backend = MisakiBackend::new(base);
let result = backend.phonemize("yes ?", &vocab).unwrap();
assert!(
result.contains('?'),
"Standalone question mark should be preserved, got: {}",
result
);
}
fn run_phonemize_pipeline(text: &str) -> Option<Vec<i64>> {
if !has_kokoro_fixtures() {
return None;
}
let base = kokoro_fixture_path();
let tokens_path = std::path::Path::new(&base)
.join("tokens.txt")
.to_string_lossy()
.to_string();
let result = phonemize_step(
PreprocessedData::Text(text.to_string()),
&tokens_path,
&PhonemizerBackend::MisakiDictionary,
None,
None,
true, true, 0, )
.unwrap();
match result {
PreprocessedData::PhonemeIds { ids, .. } => Some(ids),
_ => panic!("Expected PhonemeIds"),
}
}
#[test]
fn test_pipeline_period_token_present() {
let Some(ids) = run_phonemize_pipeline("Hello there.") else {
eprintln!("Skipping: Kokoro fixtures not found");
return;
};
assert!(
ids.contains(&4),
"Token IDs should contain period (4), got: {:?}",
ids
);
}
#[test]
fn test_pipeline_question_mark_token_present() {
let Some(ids) = run_phonemize_pipeline("Can I help you?") else {
eprintln!("Skipping: Kokoro fixtures not found");
return;
};
assert!(
ids.contains(&6),
"Token IDs should contain question mark (6), got: {:?}",
ids
);
}
#[test]
fn test_pipeline_comma_token_present() {
let Some(ids) = run_phonemize_pipeline("Beer, Ale and menu.") else {
eprintln!("Skipping: Kokoro fixtures not found");
return;
};
assert!(
ids.contains(&3),
"Token IDs should contain comma (3), got: {:?}",
ids
);
assert!(
ids.contains(&4),
"Token IDs should contain period (4), got: {:?}",
ids
);
}
#[test]
fn test_pipeline_ellipsis_token_present() {
let Some(ids) = run_phonemize_pipeline("You look...Lost.") else {
eprintln!("Skipping: Kokoro fixtures not found");
return;
};
assert!(
ids.contains(&10),
"Token IDs should contain ellipsis (10), got: {:?}",
ids
);
assert!(
ids.contains(&4),
"Token IDs should contain period (4), got: {:?}",
ids
);
}
#[test]
fn test_pipeline_spaced_question_mark_fixed() {
let Some(ids) = run_phonemize_pipeline("Can I help you ?") else {
eprintln!("Skipping: Kokoro fixtures not found");
return;
};
assert!(
ids.contains(&6),
"Spaced '?' should still produce question mark token (6), got: {:?}",
ids
);
}
#[test]
fn test_pipeline_full_example_all_punctuation() {
let input = "Hello there. You look...Lost. Can I help you ? we have Beer, Ale and an extensive menu.";
let Some(ids) = run_phonemize_pipeline(input) else {
eprintln!("Skipping: Kokoro fixtures not found");
return;
};
let period_count = ids.iter().filter(|&&id| id == 4).count();
let comma_count = ids.iter().filter(|&&id| id == 3).count();
let question_count = ids.iter().filter(|&&id| id == 6).count();
let ellipsis_count = ids.iter().filter(|&&id| id == 10).count();
assert!(
period_count >= 3,
"Expected at least 3 periods (after 'there', 'Lost', 'menu'), got {}. IDs: {:?}",
period_count,
ids
);
assert!(
comma_count >= 1,
"Expected at least 1 comma (after 'Beer'), got {}. IDs: {:?}",
comma_count,
ids
);
assert!(
question_count >= 1,
"Expected at least 1 question mark (after 'you'), got {}. IDs: {:?}",
question_count,
ids
);
assert!(
ellipsis_count >= 1,
"Expected at least 1 ellipsis (after 'look'), got {}. IDs: {:?}",
ellipsis_count,
ids
);
assert_eq!(ids[0], 0, "Should start with padding token");
assert_eq!(ids[ids.len() - 1], 0, "Should end with padding token");
}
#[test]
fn test_pipeline_no_punctuation_regression() {
let Some(ids) = run_phonemize_pipeline("hello world") else {
eprintln!("Skipping: Kokoro fixtures not found");
return;
};
assert!(ids.len() > 2, "Should produce phoneme tokens");
assert_eq!(ids[0], 0, "Should start with padding");
assert_eq!(ids[ids.len() - 1], 0, "Should end with padding");
}
#[test]
fn test_normalize_expands_number_82() {
let result = normalize_text_for_tts("I have 82 items");
assert!(
result.contains("eighty two"),
"Expected '82' to be expanded to 'eighty two', got: {}",
result
);
assert!(
!result.contains("82"),
"Original '82' should be replaced, got: {}",
result
);
}
#[test]
fn test_normalize_expands_various_numbers() {
assert_eq!(normalize_text_for_tts("0"), "zero");
assert!(normalize_text_for_tts("15 cats").contains("fifteen"));
assert!(normalize_text_for_tts("100 percent").contains("one hundred"));
}
fn run_phonemize_step_with_backend(
text: &str,
backend: &PhonemizerBackend,
normalize_text: bool,
) -> Option<(String, Vec<i64>)> {
if !has_kokoro_fixtures() {
return None;
}
let base = kokoro_fixture_path();
let tokens_path = std::path::Path::new(&base)
.join("tokens.txt")
.to_string_lossy()
.to_string();
let dict_path = match backend {
PhonemizerBackend::CmuDictionary => None,
_ => None,
};
let result = phonemize_step(
PreprocessedData::Text(text.to_string()),
&tokens_path,
backend,
dict_path,
None,
true,
normalize_text,
0, )
.unwrap();
match result {
PreprocessedData::PhonemeIds { ids, phonemes, .. } => Some((phonemes, ids)),
_ => panic!("Expected PhonemeIds"),
}
}
#[test]
fn test_phonemize_step_cmu_normalize_true_expands_numbers() {
let Some((phonemes, ids)) =
run_phonemize_step_with_backend("82", &PhonemizerBackend::CmuDictionary, true)
else {
eprintln!("Skipping: Kokoro fixtures not found");
return;
};
assert!(
!phonemes.is_empty(),
"CmuDictionary with normalize_text=true should produce phonemes for '82' (expanded to 'eighty two'), got empty"
);
assert!(
ids.len() > 2,
"CmuDictionary with normalize_text=true should produce token IDs for '82', got: {:?}",
ids
);
}
#[test]
fn test_phonemize_step_misaki_normalize_true_expands_numbers() {
let Some((phonemes, ids)) =
run_phonemize_step_with_backend("82", &PhonemizerBackend::MisakiDictionary, true)
else {
eprintln!("Skipping: Kokoro fixtures not found");
return;
};
assert!(
!phonemes.is_empty(),
"MisakiDictionary with normalize_text=true should produce phonemes for '82' (expanded to 'eighty two'), got empty"
);
assert!(
ids.len() > 2,
"MisakiDictionary with normalize_text=true should produce token IDs for '82', got: {:?}",
ids
);
}
#[test]
fn test_phonemize_step_normalize_false_does_not_expand_numbers() {
let Some((phonemes_no_norm, ids_no_norm)) =
run_phonemize_step_with_backend("82", &PhonemizerBackend::MisakiDictionary, false)
else {
eprintln!("Skipping: Kokoro fixtures not found");
return;
};
let Some((phonemes_norm, ids_norm)) =
run_phonemize_step_with_backend("82", &PhonemizerBackend::MisakiDictionary, true)
else {
return;
};
assert_ne!(
phonemes_no_norm, phonemes_norm,
"normalize_text=false should produce different phonemes than normalize_text=true for '82'. \
no_norm='{}', norm='{}'",
phonemes_no_norm, phonemes_norm
);
assert!(
ids_norm.len() > ids_no_norm.len(),
"Normalized '82' (→ 'eighty two') should produce more tokens than raw '82'. \
norm_ids={:?}, no_norm_ids={:?}",
ids_norm,
ids_no_norm
);
}
#[test]
fn test_normalize_currency_dollar() {
let result = normalize_text_for_tts("$3.50");
assert!(
!result.contains('$'),
"Dollar sign should be removed after normalization, got: {}",
result
);
assert!(
result.contains("three") || result.contains("3"),
"Currency amount should be expanded, got: {}",
result
);
}
#[test]
fn test_normalize_percentage() {
let result = normalize_text_for_tts("100%");
assert!(
!result.contains('%'),
"Percent sign should be removed after normalization, got: {}",
result
);
assert!(
result.to_lowercase().contains("percent"),
"Percentage should be expanded to 'percent', got: {}",
result
);
}
#[test]
fn test_normalize_usa_abbreviation() {
let result = normalize_text_for_tts("U.S.A.");
assert!(
!result.is_empty(),
"U.S.A. normalization should not produce empty string"
);
}
#[test]
fn test_normalize_dr_expansion() {
let result = normalize_text_for_tts("Dr.");
assert!(
result.contains("Doctor") || result.contains("doctor"),
"Dr. should expand to Doctor/doctor, got: {}",
result
);
}
#[test]
fn test_normalize_smart_quotes() {
let input = "\u{201C}Hello\u{201D}";
let result = normalize_text_for_tts(input);
assert!(
result.contains('"'),
"Smart quotes should normalize to ASCII quotes, got: {}",
result
);
assert!(
!result.contains('\u{201C}') && !result.contains('\u{201D}'),
"Smart quotes should be replaced, got: {}",
result
);
}
fn kittentts_fixture_path() -> String {
let workspace_path = "repos/xybrid/integration-tests/fixtures/models/kitten-tts-nano-0.2";
if std::path::Path::new(workspace_path).join("misaki").exists() {
return workspace_path.to_string();
}
let crate_path = "../../integration-tests/fixtures/models/kitten-tts-nano-0.2";
if std::path::Path::new(crate_path).join("misaki").exists() {
return crate_path.to_string();
}
env!("CARGO_MANIFEST_DIR").to_string()
+ "/../../integration-tests/fixtures/models/kitten-tts-nano-0.2"
}
fn has_kittentts_fixtures() -> bool {
let base = kittentts_fixture_path();
let p = std::path::Path::new(&base);
p.join("tokens.txt").exists() && p.join("misaki").join("us_gold.json").exists()
}
fn kittentts_vocab() -> HashMap<char, i64> {
use crate::phonemizer::load_tokens_map;
let base = kittentts_fixture_path();
let tokens_path = std::path::Path::new(&base).join("tokens.txt");
let tokens_content = std::fs::read_to_string(&tokens_path)
.unwrap_or_else(|e| panic!("Failed to read tokens.txt at {:?}: {}", tokens_path, e));
load_tokens_map(&tokens_content)
}
#[test]
#[ignore]
fn test_kittentts_misaki_token_mapping_validation() {
if !has_kittentts_fixtures() {
eprintln!("Skipping: KittenTTS fixtures not found");
return;
}
let vocab = kittentts_vocab();
let base = kittentts_fixture_path();
let backend = MisakiBackend::new(base);
let test_phrases = [
"Hello world",
"The year was 1984",
"Dr. Smith has 3 cats",
"Good morning everyone",
"This costs five dollars",
];
let mut all_unmapped: Vec<(String, Vec<char>)> = Vec::new();
for phrase in &test_phrases {
let normalized = normalize_text_for_tts(phrase);
let phonemes = backend
.phonemize(&normalized, &vocab)
.unwrap_or_else(|e| panic!("Phonemization failed for '{}': {}", phrase, e));
assert!(
!phonemes.is_empty(),
"Phonemization of '{}' (normalized: '{}') produced empty output",
phrase,
normalized
);
let unmapped: Vec<char> = phonemes
.chars()
.filter(|c| !vocab.contains_key(c) && *c != ' ')
.collect();
if !unmapped.is_empty() {
all_unmapped.push((phrase.to_string(), unmapped));
}
}
if !all_unmapped.is_empty() {
let details: Vec<String> = all_unmapped
.iter()
.map(|(phrase, chars)| {
let char_details: Vec<String> = chars
.iter()
.map(|c| format!("'{}' (U+{:04X})", c, *c as u32))
.collect();
format!(
" '{}': unmapped chars: [{}]",
phrase,
char_details.join(", ")
)
})
.collect();
panic!(
"Token mapping validation failed — phoneme characters not in tokens.txt:\n{}",
details.join("\n")
);
}
}
}