use crate::tts::vocab::VOCAB;
pub fn tokenize(phonemes: &str) -> Vec<i64> {
let padded = format!("${}$", phonemes);
padded
.chars()
.filter_map(|c| VOCAB.get(&c))
.map(|&idx| idx as i64)
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tokenize() {
let text = "heɪ ðɪs ɪz ˈlʌvliː!";
let tokens = tokenize(text);
assert_eq!(tokens[0], 0); assert_eq!(*tokens.last().unwrap(), 0); assert!(tokens.len() > 2);
let empty = "";
let empty_tokens = tokenize(empty);
assert_eq!(empty_tokens.len(), 2); }
}
use crate::tts::vocab::REVERSE_VOCAB;
pub fn tokens_to_phonemes(tokens: &[i64]) -> String {
tokens
.iter()
.filter_map(|&t| REVERSE_VOCAB.get(&(t as usize)))
.collect()
}
#[cfg(test)]
mod tests2 {
use super::*;
#[test]
fn test_tokens_to_phonemes() {
let tokens = vec![0, 24, 47, 54, 54, 57, 5, 0];
let text = tokens_to_phonemes(&tokens);
assert_eq!(text, "$Hello!$");
let tokens = vec![
0, 50, 83, 54, 156, 57, 135, 3, 16, 65, 156, 87, 158, 54, 46, 5, 0,
];
let text = tokens_to_phonemes(&tokens);
assert_eq!(text, "$həlˈoʊ, wˈɜːld!$");
let empty_tokens: Vec<i64> = vec![];
assert_eq!(tokens_to_phonemes(&empty_tokens), "");
}
}