#[allow(dead_code)]
#[derive(Clone)]
pub struct PhonemeEntry {
pub symbol: String,
pub ipa: String,
pub voiced: bool,
pub duration_hint_ms: f32,
}
#[allow(dead_code)]
#[derive(Clone)]
pub struct VisemeEntry {
pub name: String,
pub jaw_open: f32,
pub lip_spread: f32,
pub lip_round: f32,
pub tongue_up: f32,
pub mouth_wide: f32,
}
#[allow(dead_code)]
pub struct PhonemeMapConfig {
pub coarticulation_factor: f32,
pub default_duration_ms: f32,
pub blend_frames: usize,
}
#[allow(dead_code)]
pub struct PhonemeMap {
pub config: PhonemeMapConfig,
pub phonemes: Vec<PhonemeEntry>,
pub visemes: Vec<VisemeEntry>,
pub mapping: Vec<(String, String)>,
}
#[allow(dead_code)]
pub fn default_phoneme_map_config() -> PhonemeMapConfig {
PhonemeMapConfig {
coarticulation_factor: 0.25,
default_duration_ms: 80.0,
blend_frames: 3,
}
}
#[allow(dead_code)]
pub fn new_phoneme_map(config: PhonemeMapConfig) -> PhonemeMap {
PhonemeMap {
config,
phonemes: Vec::new(),
visemes: Vec::new(),
mapping: Vec::new(),
}
}
#[allow(dead_code)]
pub fn add_phoneme_mapping(map: &mut PhonemeMap, phoneme_sym: &str, viseme_name: &str) {
map.mapping.retain(|(p, _)| p != phoneme_sym);
map.mapping.push((phoneme_sym.to_string(), viseme_name.to_string()));
}
#[allow(dead_code)]
pub fn phoneme_to_viseme<'a>(map: &'a PhonemeMap, phoneme_sym: &str) -> Option<&'a str> {
map.mapping.iter()
.find(|(p, _)| p == phoneme_sym)
.map(|(_, v)| v.as_str())
}
#[allow(dead_code)]
pub fn viseme_to_morph_weights(map: &PhonemeMap, viseme_name: &str) -> Vec<f32> {
if let Some(v) = map.visemes.iter().find(|v| v.name == viseme_name) {
vec![v.jaw_open, v.lip_spread, v.lip_round, v.tongue_up, v.mouth_wide]
} else {
vec![0.0; 5]
}
}
#[allow(dead_code)]
pub fn phoneme_count(map: &PhonemeMap) -> usize {
map.mapping.len()
}
#[allow(dead_code)]
pub fn viseme_count(map: &PhonemeMap) -> usize {
map.visemes.len()
}
#[allow(dead_code)]
pub fn phoneme_map_to_json(map: &PhonemeMap) -> String {
let mut parts = Vec::new();
parts.push(format!("\"phoneme_count\":{}", phoneme_count(map)));
parts.push(format!("\"viseme_count\":{}", viseme_count(map)));
parts.push(format!("\"coarticulation_factor\":{}", map.config.coarticulation_factor));
parts.push(format!("\"default_duration_ms\":{}", map.config.default_duration_ms));
let mappings: Vec<String> = map.mapping.iter()
.map(|(p, v)| format!("[\"{p}\",\"{v}\"]"))
.collect();
parts.push(format!("\"mappings\":[{}]", mappings.join(",")));
format!("{{{}}}", parts.join(","))
}
#[allow(dead_code)]
pub fn build_arpabet_map() -> PhonemeMap {
let cfg = default_phoneme_map_config();
let mut map = new_phoneme_map(cfg);
add_phoneme_mapping(&mut map, "SIL", "rest");
add_phoneme_mapping(&mut map, "SP", "rest");
add_phoneme_mapping(&mut map, "AA", "aa");
add_phoneme_mapping(&mut map, "AE", "aa");
add_phoneme_mapping(&mut map, "AH", "aa");
add_phoneme_mapping(&mut map, "AO", "oh");
add_phoneme_mapping(&mut map, "AW", "oh");
add_phoneme_mapping(&mut map, "AY", "aa");
add_phoneme_mapping(&mut map, "EH", "ee");
add_phoneme_mapping(&mut map, "ER", "ee");
add_phoneme_mapping(&mut map, "EY", "ee");
add_phoneme_mapping(&mut map, "IH", "ih");
add_phoneme_mapping(&mut map, "IY", "ih");
add_phoneme_mapping(&mut map, "OW", "oh");
add_phoneme_mapping(&mut map, "OY", "oh");
add_phoneme_mapping(&mut map, "UH", "oo");
add_phoneme_mapping(&mut map, "UW", "oo");
add_phoneme_mapping(&mut map, "B", "pp");
add_phoneme_mapping(&mut map, "P", "pp");
add_phoneme_mapping(&mut map, "M", "pp");
add_phoneme_mapping(&mut map, "F", "ff");
add_phoneme_mapping(&mut map, "V", "ff");
add_phoneme_mapping(&mut map, "TH", "th");
add_phoneme_mapping(&mut map, "DH", "th");
add_phoneme_mapping(&mut map, "T", "dd");
add_phoneme_mapping(&mut map, "D", "dd");
add_phoneme_mapping(&mut map, "N", "nn");
add_phoneme_mapping(&mut map, "L", "nn");
add_phoneme_mapping(&mut map, "S", "ss");
add_phoneme_mapping(&mut map, "Z", "ss");
add_phoneme_mapping(&mut map, "SH", "sh");
add_phoneme_mapping(&mut map, "ZH", "sh");
add_phoneme_mapping(&mut map, "CH", "sh");
add_phoneme_mapping(&mut map, "JH", "sh");
add_phoneme_mapping(&mut map, "K", "kk");
add_phoneme_mapping(&mut map, "G", "kk");
add_phoneme_mapping(&mut map, "NG", "nn");
add_phoneme_mapping(&mut map, "R", "rr");
add_phoneme_mapping(&mut map, "W", "oo");
add_phoneme_mapping(&mut map, "Y", "ih");
add_phoneme_mapping(&mut map, "HH", "rest");
let viseme_defs: &[(&str, f32, f32, f32, f32, f32)] = &[
("rest", 0.0, 0.0, 0.0, 0.0, 0.0),
("aa", 0.7, 0.3, 0.0, 0.0, 0.5),
("ee", 0.4, 0.8, 0.0, 0.0, 0.8),
("ih", 0.3, 0.6, 0.0, 0.2, 0.6),
("oh", 0.5, 0.0, 0.7, 0.0, 0.3),
("oo", 0.3, 0.0, 0.9, 0.0, 0.2),
("pp", 0.0, 0.0, 0.0, 0.0, 0.0),
("ff", 0.1, 0.2, 0.0, 0.0, 0.1),
("th", 0.2, 0.3, 0.0, 0.5, 0.2),
("dd", 0.2, 0.1, 0.0, 0.4, 0.2),
("nn", 0.1, 0.0, 0.0, 0.6, 0.1),
("ss", 0.15,0.5, 0.0, 0.3, 0.3),
("sh", 0.2, 0.3, 0.3, 0.1, 0.3),
("kk", 0.3, 0.0, 0.0, 0.0, 0.3),
("rr", 0.3, 0.2, 0.2, 0.3, 0.2),
];
for (name, jaw, spread, round, tongue, wide) in viseme_defs {
map.visemes.push(VisemeEntry {
name: name.to_string(),
jaw_open: *jaw,
lip_spread: *spread,
lip_round: *round,
tongue_up: *tongue,
mouth_wide: *wide,
});
}
map
}
#[allow(dead_code)]
pub fn phoneme_duration(map: &PhonemeMap, phoneme_sym: &str) -> f32 {
if let Some(p) = map.phonemes.iter().find(|p| p.symbol == phoneme_sym) {
p.duration_hint_ms
} else {
map.config.default_duration_ms
}
}
#[allow(dead_code)]
pub fn coarticulation_blend(map: &PhonemeMap, from: &str, to: &str, t: f32) -> Vec<f32> {
let w_from = viseme_to_morph_weights(
map,
phoneme_to_viseme(map, from).unwrap_or("rest"),
);
let w_to = viseme_to_morph_weights(
map,
phoneme_to_viseme(map, to).unwrap_or("rest"),
);
let factor = map.config.coarticulation_factor;
let t_adj = t + (1.0 - t) * factor;
let t_clamped = t_adj.clamp(0.0, 1.0);
w_from.iter().zip(w_to.iter())
.map(|(a, b)| a + (b - a) * t_clamped)
.collect()
}
#[allow(dead_code)]
pub fn dominant_viseme(map: &PhonemeMap, viseme_name: &str) -> f32 {
let weights = viseme_to_morph_weights(map, viseme_name);
weights.iter().cloned().fold(0.0f32, f32::max)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let cfg = default_phoneme_map_config();
assert!((cfg.coarticulation_factor - 0.25).abs() < 1e-6);
assert!((cfg.default_duration_ms - 80.0).abs() < 1e-6);
}
#[test]
fn test_new_phoneme_map_empty() {
let map = new_phoneme_map(default_phoneme_map_config());
assert_eq!(phoneme_count(&map), 0);
assert_eq!(viseme_count(&map), 0);
}
#[test]
fn test_add_phoneme_mapping() {
let mut map = new_phoneme_map(default_phoneme_map_config());
add_phoneme_mapping(&mut map, "AA", "aa");
assert_eq!(phoneme_count(&map), 1);
}
#[test]
fn test_phoneme_to_viseme_found() {
let mut map = new_phoneme_map(default_phoneme_map_config());
add_phoneme_mapping(&mut map, "AH", "aa");
let v = phoneme_to_viseme(&map, "AH");
assert_eq!(v, Some("aa"));
}
#[test]
fn test_phoneme_to_viseme_not_found() {
let map = new_phoneme_map(default_phoneme_map_config());
assert!(phoneme_to_viseme(&map, "ZZ").is_none());
}
#[test]
fn test_add_mapping_replaces_existing() {
let mut map = new_phoneme_map(default_phoneme_map_config());
add_phoneme_mapping(&mut map, "AA", "aa");
add_phoneme_mapping(&mut map, "AA", "oh");
assert_eq!(phoneme_count(&map), 1);
assert_eq!(phoneme_to_viseme(&map, "AA"), Some("oh"));
}
#[test]
fn test_viseme_to_morph_weights_found() {
let map = build_arpabet_map();
let weights = viseme_to_morph_weights(&map, "aa");
assert_eq!(weights.len(), 5);
assert!(weights[0] > 0.0);
}
#[test]
fn test_viseme_to_morph_weights_not_found() {
let map = new_phoneme_map(default_phoneme_map_config());
let weights = viseme_to_morph_weights(&map, "nonexistent");
assert_eq!(weights, vec![0.0; 5]);
}
#[test]
fn test_build_arpabet_map_phoneme_count() {
let map = build_arpabet_map();
assert!(phoneme_count(&map) >= 30);
}
#[test]
fn test_build_arpabet_map_viseme_count() {
let map = build_arpabet_map();
assert!(viseme_count(&map) >= 10);
}
#[test]
fn test_phoneme_duration_default() {
let map = new_phoneme_map(default_phoneme_map_config());
let dur = phoneme_duration(&map, "AA");
assert!((dur - 80.0).abs() < 1e-5);
}
#[test]
fn test_phoneme_map_to_json() {
let map = build_arpabet_map();
let json = phoneme_map_to_json(&map);
assert!(json.contains("phoneme_count"));
assert!(json.contains("viseme_count"));
assert!(json.contains("mappings"));
}
#[test]
fn test_coarticulation_blend_at_zero() {
let map = build_arpabet_map();
let w = coarticulation_blend(&map, "SIL", "AA", 0.0);
assert_eq!(w.len(), 5);
}
#[test]
fn test_coarticulation_blend_at_one() {
let map = build_arpabet_map();
let w1 = coarticulation_blend(&map, "SIL", "AA", 1.0);
let w2 = viseme_to_morph_weights(&map, "aa");
for (a, b) in w1.iter().zip(w2.iter()) {
assert!((a - b).abs() < 1e-5);
}
}
#[test]
fn test_dominant_viseme_rest_is_zero() {
let map = build_arpabet_map();
let d = dominant_viseme(&map, "rest");
assert!((d).abs() < 1e-6);
}
#[test]
fn test_dominant_viseme_aa_positive() {
let map = build_arpabet_map();
let d = dominant_viseme(&map, "aa");
assert!(d > 0.0);
}
#[test]
fn test_arpabet_bilabial_maps_to_pp() {
let map = build_arpabet_map();
assert_eq!(phoneme_to_viseme(&map, "B"), Some("pp"));
assert_eq!(phoneme_to_viseme(&map, "P"), Some("pp"));
assert_eq!(phoneme_to_viseme(&map, "M"), Some("pp"));
}
}