Skip to main content

oxihuman_morph/
speech_viseme.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! Speech viseme / lip-sync system for OxiHuman.
5//!
6//! Maps spoken phonemes (IPA-inspired, ARPAbet-compatible) to facial viseme
7//! morph-target weights for real-time animated lip synchronization.
8
9#![allow(dead_code)]
10
11use std::collections::HashMap;
12
13// ---------------------------------------------------------------------------
14// Phoneme
15// ---------------------------------------------------------------------------
16
17/// English phonemes (IPA-inspired, ARPAbet subset).
18#[derive(Clone, Debug, PartialEq, Eq, Hash)]
19pub enum Phoneme {
20    // Silence
21    Silence,
22    // Vowels
23    AA, // "f**ther"
24    AE, // "c**t"
25    AH, // "b**t"
26    AO, // "b**ght"
27    AW, // "c**w"
28    AY, // "m**"
29    EH, // "b**d"
30    ER, // "b**d" (r-colored)
31    EY, // "b**te"
32    IH, // "b**t"
33    IY, // "b**te"
34    OW, // "b**ne"
35    OY, // "b**y"
36    UH, // "b**k"
37    UW, // "b**te"
38    // Consonants
39    B,
40    CH,
41    D,
42    DH,
43    F,
44    G,
45    HH,
46    JH,
47    K,
48    L,
49    M,
50    N,
51    NG,
52    P,
53    R,
54    S,
55    SH,
56    T,
57    TH,
58    V,
59    W,
60    Y,
61    Z,
62    ZH,
63}
64
65impl Phoneme {
66    /// All phonemes in a fixed order.
67    pub fn all() -> &'static [Phoneme] {
68        &[
69            Phoneme::Silence,
70            Phoneme::AA,
71            Phoneme::AE,
72            Phoneme::AH,
73            Phoneme::AO,
74            Phoneme::AW,
75            Phoneme::AY,
76            Phoneme::EH,
77            Phoneme::ER,
78            Phoneme::EY,
79            Phoneme::IH,
80            Phoneme::IY,
81            Phoneme::OW,
82            Phoneme::OY,
83            Phoneme::UH,
84            Phoneme::UW,
85            Phoneme::B,
86            Phoneme::CH,
87            Phoneme::D,
88            Phoneme::DH,
89            Phoneme::F,
90            Phoneme::G,
91            Phoneme::HH,
92            Phoneme::JH,
93            Phoneme::K,
94            Phoneme::L,
95            Phoneme::M,
96            Phoneme::N,
97            Phoneme::NG,
98            Phoneme::P,
99            Phoneme::R,
100            Phoneme::S,
101            Phoneme::SH,
102            Phoneme::T,
103            Phoneme::TH,
104            Phoneme::V,
105            Phoneme::W,
106            Phoneme::Y,
107            Phoneme::Z,
108            Phoneme::ZH,
109        ]
110    }
111
112    /// Human-readable name (ARPAbet string).
113    pub fn name(&self) -> &'static str {
114        match self {
115            Phoneme::Silence => "Silence",
116            Phoneme::AA => "AA",
117            Phoneme::AE => "AE",
118            Phoneme::AH => "AH",
119            Phoneme::AO => "AO",
120            Phoneme::AW => "AW",
121            Phoneme::AY => "AY",
122            Phoneme::EH => "EH",
123            Phoneme::ER => "ER",
124            Phoneme::EY => "EY",
125            Phoneme::IH => "IH",
126            Phoneme::IY => "IY",
127            Phoneme::OW => "OW",
128            Phoneme::OY => "OY",
129            Phoneme::UH => "UH",
130            Phoneme::UW => "UW",
131            Phoneme::B => "B",
132            Phoneme::CH => "CH",
133            Phoneme::D => "D",
134            Phoneme::DH => "DH",
135            Phoneme::F => "F",
136            Phoneme::G => "G",
137            Phoneme::HH => "HH",
138            Phoneme::JH => "JH",
139            Phoneme::K => "K",
140            Phoneme::L => "L",
141            Phoneme::M => "M",
142            Phoneme::N => "N",
143            Phoneme::NG => "NG",
144            Phoneme::P => "P",
145            Phoneme::R => "R",
146            Phoneme::S => "S",
147            Phoneme::SH => "SH",
148            Phoneme::T => "T",
149            Phoneme::TH => "TH",
150            Phoneme::V => "V",
151            Phoneme::W => "W",
152            Phoneme::Y => "Y",
153            Phoneme::Z => "Z",
154            Phoneme::ZH => "ZH",
155        }
156    }
157
158    /// Returns `true` for vowel phonemes.
159    pub fn is_vowel(&self) -> bool {
160        matches!(
161            self,
162            Phoneme::AA
163                | Phoneme::AE
164                | Phoneme::AH
165                | Phoneme::AO
166                | Phoneme::AW
167                | Phoneme::AY
168                | Phoneme::EH
169                | Phoneme::ER
170                | Phoneme::EY
171                | Phoneme::IH
172                | Phoneme::IY
173                | Phoneme::OW
174                | Phoneme::OY
175                | Phoneme::UH
176                | Phoneme::UW
177        )
178    }
179
180    /// Returns `true` for consonant phonemes.
181    pub fn is_consonant(&self) -> bool {
182        matches!(
183            self,
184            Phoneme::B
185                | Phoneme::CH
186                | Phoneme::D
187                | Phoneme::DH
188                | Phoneme::F
189                | Phoneme::G
190                | Phoneme::HH
191                | Phoneme::JH
192                | Phoneme::K
193                | Phoneme::L
194                | Phoneme::M
195                | Phoneme::N
196                | Phoneme::NG
197                | Phoneme::P
198                | Phoneme::R
199                | Phoneme::S
200                | Phoneme::SH
201                | Phoneme::T
202                | Phoneme::TH
203                | Phoneme::V
204                | Phoneme::W
205                | Phoneme::Y
206                | Phoneme::Z
207                | Phoneme::ZH
208        )
209    }
210
211    /// Parse an ARPAbet string (case-insensitive) into a `Phoneme`.
212    ///
213    /// # Examples
214    /// ```
215    /// use oxihuman_morph::speech_viseme::Phoneme;
216    /// assert_eq!(Phoneme::from_arpabet("AA"), Some(Phoneme::AA));
217    /// assert_eq!(Phoneme::from_arpabet("sil"), Some(Phoneme::Silence));
218    /// ```
219    pub fn from_arpabet(s: &str) -> Option<Phoneme> {
220        match s.to_uppercase().as_str() {
221            "SILENCE" | "SIL" | "SP" | "_" => Some(Phoneme::Silence),
222            "AA" => Some(Phoneme::AA),
223            "AE" => Some(Phoneme::AE),
224            "AH" => Some(Phoneme::AH),
225            "AO" => Some(Phoneme::AO),
226            "AW" => Some(Phoneme::AW),
227            "AY" => Some(Phoneme::AY),
228            "EH" => Some(Phoneme::EH),
229            "ER" => Some(Phoneme::ER),
230            "EY" => Some(Phoneme::EY),
231            "IH" => Some(Phoneme::IH),
232            "IY" => Some(Phoneme::IY),
233            "OW" => Some(Phoneme::OW),
234            "OY" => Some(Phoneme::OY),
235            "UH" => Some(Phoneme::UH),
236            "UW" => Some(Phoneme::UW),
237            "B" => Some(Phoneme::B),
238            "CH" => Some(Phoneme::CH),
239            "D" => Some(Phoneme::D),
240            "DH" => Some(Phoneme::DH),
241            "F" => Some(Phoneme::F),
242            "G" => Some(Phoneme::G),
243            "HH" => Some(Phoneme::HH),
244            "JH" => Some(Phoneme::JH),
245            "K" => Some(Phoneme::K),
246            "L" => Some(Phoneme::L),
247            "M" => Some(Phoneme::M),
248            "N" => Some(Phoneme::N),
249            "NG" => Some(Phoneme::NG),
250            "P" => Some(Phoneme::P),
251            "R" => Some(Phoneme::R),
252            "S" => Some(Phoneme::S),
253            "SH" => Some(Phoneme::SH),
254            "T" => Some(Phoneme::T),
255            "TH" => Some(Phoneme::TH),
256            "V" => Some(Phoneme::V),
257            "W" => Some(Phoneme::W),
258            "Y" => Some(Phoneme::Y),
259            "Z" => Some(Phoneme::Z),
260            "ZH" => Some(Phoneme::ZH),
261            _ => None,
262        }
263    }
264}
265
266// ---------------------------------------------------------------------------
267// Viseme
268// ---------------------------------------------------------------------------
269
270/// A viseme: the canonical mouth shape associated with one or more phonemes.
271#[derive(Clone, Debug, PartialEq, Eq, Hash)]
272pub enum Viseme {
273    /// Mouth closed (silence).
274    Silence,
275    /// Bilabial plosive/nasal: B, M, P.
276    PP,
277    /// Labiodental fricative: F, V.
278    FF,
279    /// Dental fricative: TH, DH.
280    TH,
281    /// Alveolar: D, T, N, L.
282    DD,
283    /// Velar: K, G, NG.
284    KK,
285    /// Palatal/affricate: CH, SH, ZH, JH.
286    CH,
287    /// Sibilant fricative: S, Z.
288    SS,
289    /// Open vowel: AA, AE, AH.
290    Aa,
291    /// Mid vowel: EH, ER, AY (and EY).
292    E,
293    /// Close-front vowel: IH, IY.
294    I,
295    /// Rounded mid vowel: OW, AO, OY.
296    O,
297    /// Close-back / rounded vowel: UH, UW, AW.
298    U,
299    /// Retroflex / rhotic: R, ER.
300    RR,
301    /// Mid-neutral: HH, W, Y.
302    Neutral,
303}
304
305impl Viseme {
306    /// All visemes in a fixed order.
307    pub fn all() -> &'static [Viseme] {
308        &[
309            Viseme::Silence,
310            Viseme::PP,
311            Viseme::FF,
312            Viseme::TH,
313            Viseme::DD,
314            Viseme::KK,
315            Viseme::CH,
316            Viseme::SS,
317            Viseme::Aa,
318            Viseme::E,
319            Viseme::I,
320            Viseme::O,
321            Viseme::U,
322            Viseme::RR,
323            Viseme::Neutral,
324        ]
325    }
326
327    /// Human-readable name.
328    pub fn name(&self) -> &'static str {
329        match self {
330            Viseme::Silence => "Silence",
331            Viseme::PP => "PP",
332            Viseme::FF => "FF",
333            Viseme::TH => "TH",
334            Viseme::DD => "DD",
335            Viseme::KK => "KK",
336            Viseme::CH => "CH",
337            Viseme::SS => "SS",
338            Viseme::Aa => "Aa",
339            Viseme::E => "E",
340            Viseme::I => "I",
341            Viseme::O => "O",
342            Viseme::U => "U",
343            Viseme::RR => "RR",
344            Viseme::Neutral => "Neutral",
345        }
346    }
347}
348
349// ---------------------------------------------------------------------------
350// phoneme_to_viseme
351// ---------------------------------------------------------------------------
352
353/// Map a phoneme to its canonical viseme.
354pub fn phoneme_to_viseme(phoneme: &Phoneme) -> Viseme {
355    match phoneme {
356        // Silence
357        Phoneme::Silence => Viseme::Silence,
358        // Bilabial: B, M, P
359        Phoneme::B | Phoneme::M | Phoneme::P => Viseme::PP,
360        // Labiodental: F, V
361        Phoneme::F | Phoneme::V => Viseme::FF,
362        // Dental: TH, DH
363        Phoneme::TH | Phoneme::DH => Viseme::TH,
364        // Alveolar: D, T, N, L
365        Phoneme::D | Phoneme::T | Phoneme::N | Phoneme::L => Viseme::DD,
366        // Velar: K, G, NG
367        Phoneme::K | Phoneme::G | Phoneme::NG => Viseme::KK,
368        // Palatal/affricate: CH, SH, ZH, JH
369        Phoneme::CH | Phoneme::SH | Phoneme::ZH | Phoneme::JH => Viseme::CH,
370        // Sibilant: S, Z
371        Phoneme::S | Phoneme::Z => Viseme::SS,
372        // Open vowels: AA, AE, AH
373        Phoneme::AA | Phoneme::AE | Phoneme::AH => Viseme::Aa,
374        // Mid vowels: EH, AY (+ EY)
375        Phoneme::EH | Phoneme::AY | Phoneme::EY => Viseme::E,
376        // Rhotic (ER maps to RR as primary viseme)
377        Phoneme::ER => Viseme::RR,
378        // Close-front vowels: IH, IY
379        Phoneme::IH | Phoneme::IY => Viseme::I,
380        // Rounded mid vowels: OW, AO, OY
381        Phoneme::AO | Phoneme::OW | Phoneme::OY => Viseme::O,
382        // Close-back / rounded: UH, UW, AW
383        Phoneme::UH | Phoneme::UW | Phoneme::AW => Viseme::U,
384        // Retroflex: R
385        Phoneme::R => Viseme::RR,
386        // Mid-neutral: HH, W, Y
387        Phoneme::HH | Phoneme::W | Phoneme::Y => Viseme::Neutral,
388    }
389}
390
391// ---------------------------------------------------------------------------
392// VisemeMorphWeights / VisemeMapper
393// ---------------------------------------------------------------------------
394
395/// A set of morph-target name → weight pairs for one viseme.
396pub type VisemeMorphWeights = HashMap<String, f32>;
397
398/// Maps visemes to morph-target weight sets.
399pub struct VisemeMapper {
400    mappings: HashMap<Viseme, VisemeMorphWeights>,
401}
402
403impl VisemeMapper {
404    /// Create an empty mapper.
405    pub fn new() -> Self {
406        Self {
407            mappings: HashMap::new(),
408        }
409    }
410
411    /// Register or replace the morph weights for `viseme`.
412    pub fn set_viseme(&mut self, viseme: Viseme, weights: VisemeMorphWeights) {
413        self.mappings.insert(viseme, weights);
414    }
415
416    /// Return the morph weights for `viseme` (empty map if not registered).
417    pub fn get_weights(&self, viseme: &Viseme) -> VisemeMorphWeights {
418        self.mappings.get(viseme).cloned().unwrap_or_default()
419    }
420
421    /// Evaluate the morph weights for the viseme corresponding to `phoneme`.
422    pub fn evaluate_phoneme(&self, phoneme: &Phoneme) -> VisemeMorphWeights {
423        let viseme = phoneme_to_viseme(phoneme);
424        self.get_weights(&viseme)
425    }
426}
427
428impl Default for VisemeMapper {
429    fn default() -> Self {
430        Self::new()
431    }
432}
433
434// ---------------------------------------------------------------------------
435// default_viseme_mapper
436// ---------------------------------------------------------------------------
437
438/// Build a `VisemeMapper` pre-loaded with MakeHuman-style morph names and
439/// sensible default weights.
440pub fn default_viseme_mapper() -> VisemeMapper {
441    let mut mapper = VisemeMapper::new();
442
443    // Helper closure to build a weight map from key-value pairs.
444    let weights = |pairs: &[(&str, f32)]| -> VisemeMorphWeights {
445        pairs.iter().map(|(k, v)| (k.to_string(), *v)).collect()
446    };
447
448    // Silence — mouth fully closed, no tension.
449    mapper.set_viseme(Viseme::Silence, weights(&[("lips_closed", 1.0)]));
450
451    // PP — bilabial: B, M, P
452    mapper.set_viseme(
453        Viseme::PP,
454        weights(&[("lips_closed", 0.9), ("lips_press", 0.5)]),
455    );
456
457    // FF — labiodental: F, V
458    mapper.set_viseme(
459        Viseme::FF,
460        weights(&[("lower_lip_up", 0.6), ("upper_teeth_show", 0.5)]),
461    );
462
463    // TH — dental: TH, DH
464    mapper.set_viseme(
465        Viseme::TH,
466        weights(&[("lips_part", 0.4), ("tongue_tip_up", 0.7)]),
467    );
468
469    // DD — alveolar: D, T, N, L
470    mapper.set_viseme(
471        Viseme::DD,
472        weights(&[("lips_part", 0.3), ("jaw_drop", 0.15)]),
473    );
474
475    // KK — velar: K, G, NG
476    mapper.set_viseme(
477        Viseme::KK,
478        weights(&[("lips_part", 0.25), ("jaw_drop", 0.2)]),
479    );
480
481    // CH — palatal / affricate: CH, SH, ZH, JH
482    mapper.set_viseme(
483        Viseme::CH,
484        weights(&[("lips_round", 0.4), ("lips_part", 0.35), ("jaw_drop", 0.1)]),
485    );
486
487    // SS — sibilant: S, Z
488    mapper.set_viseme(
489        Viseme::SS,
490        weights(&[("lips_part", 0.2), ("teeth_show", 0.4)]),
491    );
492
493    // Aa — open vowel: AA, AE, AH
494    mapper.set_viseme(
495        Viseme::Aa,
496        weights(&[("jaw_drop", 0.7), ("lips_open", 0.8)]),
497    );
498
499    // E — mid vowel: EH, AY, EY
500    mapper.set_viseme(
501        Viseme::E,
502        weights(&[("lips_wide", 0.5), ("jaw_drop", 0.35), ("lips_open", 0.4)]),
503    );
504
505    // I — close-front vowel: IH, IY
506    mapper.set_viseme(Viseme::I, weights(&[("lips_wide", 0.6), ("jaw_drop", 0.2)]));
507
508    // O — rounded mid vowel: OW, AO, OY
509    mapper.set_viseme(
510        Viseme::O,
511        weights(&[("lips_round", 0.8), ("jaw_drop", 0.4)]),
512    );
513
514    // U — close-back / rounded: UH, UW, AW
515    mapper.set_viseme(
516        Viseme::U,
517        weights(&[("lips_round", 0.9), ("jaw_drop", 0.3), ("lips_pucker", 0.5)]),
518    );
519
520    // RR — retroflex: R, ER
521    mapper.set_viseme(
522        Viseme::RR,
523        weights(&[("lips_part", 0.35), ("jaw_drop", 0.25), ("lips_round", 0.3)]),
524    );
525
526    // Neutral — mid-neutral: HH, W, Y
527    mapper.set_viseme(
528        Viseme::Neutral,
529        weights(&[("lips_part", 0.15), ("jaw_drop", 0.1)]),
530    );
531
532    mapper
533}
534
535// ---------------------------------------------------------------------------
536// PhonemeEvent / LipSyncTrack
537// ---------------------------------------------------------------------------
538
539/// A single timed phoneme event in a lip-sync timeline.
540pub struct PhonemeEvent {
541    /// Start time in seconds.
542    pub start: f32,
543    /// End time in seconds.
544    pub end: f32,
545    /// The phoneme being spoken.
546    pub phoneme: Phoneme,
547    /// Amplitude / intensity in [0, 1].
548    pub intensity: f32,
549}
550
551/// A complete lip-sync track: an ordered sequence of `PhonemeEvent`s.
552pub struct LipSyncTrack {
553    /// Phoneme events sorted by start time.
554    pub events: Vec<PhonemeEvent>,
555    /// Total duration of the track in seconds.
556    pub duration: f32,
557}
558
559impl LipSyncTrack {
560    /// Create an empty track.
561    pub fn new() -> Self {
562        Self {
563            events: Vec::new(),
564            duration: 0.0,
565        }
566    }
567
568    /// Append a phoneme event, updating `duration` as needed.
569    pub fn add_event(&mut self, event: PhonemeEvent) {
570        if event.end > self.duration {
571            self.duration = event.end;
572        }
573        self.events.push(event);
574    }
575
576    /// Number of events in the track.
577    pub fn event_count(&self) -> usize {
578        self.events.len()
579    }
580
581    /// Evaluate morph weights at time `t`.
582    ///
583    /// Finds the active event (start ≤ t < end) and applies a short
584    /// coarticulation blend: in the last 0.05 s of an event the weights are
585    /// linearly interpolated toward the *next* event's weights.
586    pub fn evaluate(&self, t: f32, mapper: &VisemeMapper) -> VisemeMorphWeights {
587        // Locate the current event index.
588        let maybe_idx = self
589            .events
590            .iter()
591            .enumerate()
592            .find(|(_, ev)| ev.start <= t && t < ev.end)
593            .map(|(i, _)| i);
594
595        let Some(idx) = maybe_idx else {
596            // Outside all events — return silence.
597            return mapper.get_weights(&Viseme::Silence);
598        };
599
600        let current = &self.events[idx];
601        let current_weights = mapper.evaluate_phoneme(&current.phoneme);
602
603        // Scale by intensity.
604        let scale_weights = |w: &VisemeMorphWeights, scale: f32| -> VisemeMorphWeights {
605            w.iter().map(|(k, v)| (k.clone(), v * scale)).collect()
606        };
607
608        // Coarticulation blend: lerp toward next event in the last 0.05 s.
609        const BLEND_WINDOW: f32 = 0.05;
610        let time_left = current.end - t;
611
612        if time_left < BLEND_WINDOW {
613            if let Some(next) = self.events.get(idx + 1) {
614                let alpha = 1.0 - time_left / BLEND_WINDOW; // 0→1 as we approach end
615                let next_weights = mapper.evaluate_phoneme(&next.phoneme);
616
617                // Lerp: current * (1-alpha) + next * alpha, scaled by intensity.
618                let mut blended: VisemeMorphWeights = HashMap::new();
619
620                // Collect all keys.
621                let mut all_keys: std::collections::HashSet<&String> =
622                    std::collections::HashSet::new();
623                for k in current_weights.keys() {
624                    all_keys.insert(k);
625                }
626                for k in next_weights.keys() {
627                    all_keys.insert(k);
628                }
629
630                for key in all_keys {
631                    let cw = *current_weights.get(key).unwrap_or(&0.0);
632                    let nw = *next_weights.get(key).unwrap_or(&0.0);
633                    let lerped = cw * (1.0 - alpha) + nw * alpha;
634                    blended.insert(key.clone(), lerped * current.intensity);
635                }
636                return blended;
637            }
638        }
639
640        scale_weights(&current_weights, current.intensity)
641    }
642
643    /// Parse a simple phoneme timeline string.
644    ///
645    /// Format: `"0.0:AA 0.2:B 0.4:IY"`
646    ///
647    /// Each token is `<start>:<PHONEME>`.  The duration of each event is
648    /// inferred as the gap to the next token, or 0.1 s for the last token.
649    pub fn from_string(s: &str) -> Self {
650        let mut track = LipSyncTrack::new();
651
652        // Collect (start, phoneme) pairs.
653        let pairs: Vec<(f32, Phoneme)> = s
654            .split_whitespace()
655            .filter_map(|token| {
656                let (time_str, phon_str) = token.split_once(':')?;
657                let start: f32 = time_str.parse().ok()?;
658                let phoneme = Phoneme::from_arpabet(phon_str)?;
659                Some((start, phoneme))
660            })
661            .collect();
662
663        for (i, (start, phoneme)) in pairs.iter().enumerate() {
664            let end = pairs.get(i + 1).map(|(t, _)| *t).unwrap_or(start + 0.1);
665
666            track.add_event(PhonemeEvent {
667                start: *start,
668                end,
669                phoneme: phoneme.clone(),
670                intensity: 1.0,
671            });
672        }
673
674        track
675    }
676}
677
678impl Default for LipSyncTrack {
679    fn default() -> Self {
680        Self::new()
681    }
682}
683
684// ---------------------------------------------------------------------------
685// Tests
686// ---------------------------------------------------------------------------
687
688#[cfg(test)]
689mod tests {
690    use super::*;
691
692    #[test]
693    fn test_phoneme_all() {
694        let all = Phoneme::all();
695        // Must contain Silence and all 39 phonemes = 40 total.
696        assert_eq!(all.len(), 40);
697        assert!(all.contains(&Phoneme::Silence));
698        assert!(all.contains(&Phoneme::AA));
699        assert!(all.contains(&Phoneme::ZH));
700    }
701
702    #[test]
703    fn test_phoneme_is_vowel() {
704        assert!(Phoneme::AA.is_vowel());
705        assert!(Phoneme::IY.is_vowel());
706        assert!(Phoneme::UW.is_vowel());
707        assert!(Phoneme::ER.is_vowel());
708        assert!(!Phoneme::B.is_vowel());
709        assert!(!Phoneme::M.is_vowel());
710        assert!(!Phoneme::Silence.is_vowel());
711    }
712
713    #[test]
714    fn test_phoneme_is_consonant() {
715        assert!(Phoneme::B.is_consonant());
716        assert!(Phoneme::ZH.is_consonant());
717        assert!(Phoneme::NG.is_consonant());
718        assert!(!Phoneme::AA.is_consonant());
719        assert!(!Phoneme::Silence.is_consonant());
720    }
721
722    #[test]
723    fn test_phoneme_from_arpabet() {
724        assert_eq!(Phoneme::from_arpabet("AA"), Some(Phoneme::AA));
725        assert_eq!(Phoneme::from_arpabet("aa"), Some(Phoneme::AA));
726        assert_eq!(Phoneme::from_arpabet("sil"), Some(Phoneme::Silence));
727        assert_eq!(Phoneme::from_arpabet("SIL"), Some(Phoneme::Silence));
728        assert_eq!(Phoneme::from_arpabet("SP"), Some(Phoneme::Silence));
729        assert_eq!(Phoneme::from_arpabet("ZH"), Some(Phoneme::ZH));
730        assert_eq!(Phoneme::from_arpabet("NG"), Some(Phoneme::NG));
731        assert_eq!(Phoneme::from_arpabet("NOPE"), None);
732    }
733
734    #[test]
735    fn test_viseme_all() {
736        let all = Viseme::all();
737        assert_eq!(all.len(), 15);
738        assert!(all.contains(&Viseme::Silence));
739        assert!(all.contains(&Viseme::PP));
740        assert!(all.contains(&Viseme::RR));
741        assert!(all.contains(&Viseme::Neutral));
742    }
743
744    #[test]
745    fn test_phoneme_to_viseme_bilabial() {
746        assert_eq!(phoneme_to_viseme(&Phoneme::B), Viseme::PP);
747        assert_eq!(phoneme_to_viseme(&Phoneme::M), Viseme::PP);
748        assert_eq!(phoneme_to_viseme(&Phoneme::P), Viseme::PP);
749    }
750
751    #[test]
752    fn test_phoneme_to_viseme_vowel() {
753        assert_eq!(phoneme_to_viseme(&Phoneme::AA), Viseme::Aa);
754        assert_eq!(phoneme_to_viseme(&Phoneme::AH), Viseme::Aa);
755        assert_eq!(phoneme_to_viseme(&Phoneme::IH), Viseme::I);
756        assert_eq!(phoneme_to_viseme(&Phoneme::IY), Viseme::I);
757        assert_eq!(phoneme_to_viseme(&Phoneme::OW), Viseme::O);
758        assert_eq!(phoneme_to_viseme(&Phoneme::UW), Viseme::U);
759        assert_eq!(phoneme_to_viseme(&Phoneme::AW), Viseme::U);
760        assert_eq!(phoneme_to_viseme(&Phoneme::R), Viseme::RR);
761        assert_eq!(phoneme_to_viseme(&Phoneme::ER), Viseme::RR);
762    }
763
764    #[test]
765    fn test_phoneme_to_viseme_silence() {
766        assert_eq!(phoneme_to_viseme(&Phoneme::Silence), Viseme::Silence);
767    }
768
769    #[test]
770    fn test_viseme_mapper_default() {
771        let mapper = default_viseme_mapper();
772        // PP should have lips_closed and lips_press.
773        let pp = mapper.get_weights(&Viseme::PP);
774        assert!(pp.contains_key("lips_closed"));
775        assert!(pp.contains_key("lips_press"));
776        assert!((pp["lips_closed"] - 0.9).abs() < 1e-5);
777
778        // Silence should close lips fully.
779        let sil = mapper.get_weights(&Viseme::Silence);
780        assert_eq!(sil["lips_closed"], 1.0);
781
782        // Aa should have jaw_drop and lips_open.
783        let aa = mapper.get_weights(&Viseme::Aa);
784        assert!(aa.contains_key("jaw_drop"));
785        assert!((aa["jaw_drop"] - 0.7).abs() < 1e-5);
786    }
787
788    #[test]
789    fn test_viseme_mapper_evaluate_phoneme() {
790        let mapper = default_viseme_mapper();
791        let weights = mapper.evaluate_phoneme(&Phoneme::B);
792        // B → PP → lips_closed weight.
793        assert!(weights.contains_key("lips_closed"));
794
795        let weights_i = mapper.evaluate_phoneme(&Phoneme::IY);
796        assert!(weights_i.contains_key("lips_wide"));
797
798        let weights_u = mapper.evaluate_phoneme(&Phoneme::UW);
799        assert!(weights_u.contains_key("lips_round"));
800        assert!(weights_u.contains_key("lips_pucker"));
801    }
802
803    #[test]
804    fn test_lip_sync_track_new() {
805        let track = LipSyncTrack::new();
806        assert_eq!(track.event_count(), 0);
807        assert_eq!(track.duration, 0.0);
808
809        let mut track2 = LipSyncTrack::default();
810        track2.add_event(PhonemeEvent {
811            start: 0.0,
812            end: 0.2,
813            phoneme: Phoneme::AA,
814            intensity: 1.0,
815        });
816        assert_eq!(track2.event_count(), 1);
817        assert!((track2.duration - 0.2).abs() < 1e-6);
818    }
819
820    #[test]
821    fn test_lip_sync_track_evaluate() {
822        let mapper = default_viseme_mapper();
823        let mut track = LipSyncTrack::new();
824
825        track.add_event(PhonemeEvent {
826            start: 0.0,
827            end: 0.3,
828            phoneme: Phoneme::AA,
829            intensity: 1.0,
830        });
831        track.add_event(PhonemeEvent {
832            start: 0.3,
833            end: 0.6,
834            phoneme: Phoneme::B,
835            intensity: 0.8,
836        });
837
838        // At t=0.1 we should be well inside the AA event.
839        let w = track.evaluate(0.1, &mapper);
840        assert!(w.contains_key("jaw_drop") || w.contains_key("lips_open"));
841
842        // At t=-0.1 (before track) — should return silence.
843        let w_before = track.evaluate(-0.1, &mapper);
844        assert!(w_before.contains_key("lips_closed") || w_before.is_empty());
845
846        // At t=0.7 (after track) — should return silence.
847        let w_after = track.evaluate(0.7, &mapper);
848        assert!(w_after.contains_key("lips_closed") || w_after.is_empty());
849    }
850
851    #[test]
852    fn test_lip_sync_from_string() {
853        let track = LipSyncTrack::from_string("0.0:AA 0.2:B 0.4:IY");
854        assert_eq!(track.event_count(), 3);
855
856        // First event: AA 0.0→0.2
857        assert_eq!(track.events[0].phoneme, Phoneme::AA);
858        assert!((track.events[0].start - 0.0).abs() < 1e-6);
859        assert!((track.events[0].end - 0.2).abs() < 1e-6);
860
861        // Second event: B 0.2→0.4
862        assert_eq!(track.events[1].phoneme, Phoneme::B);
863        assert!((track.events[1].end - 0.4).abs() < 1e-6);
864
865        // Last event: IY 0.4→0.5 (inferred 0.1 s)
866        assert_eq!(track.events[2].phoneme, Phoneme::IY);
867        assert!((track.events[2].end - 0.5).abs() < 1e-6);
868
869        // Duration should be 0.5.
870        assert!((track.duration - 0.5).abs() < 1e-6);
871    }
872
873    #[test]
874    fn test_phoneme_name() {
875        assert_eq!(Phoneme::Silence.name(), "Silence");
876        assert_eq!(Phoneme::AA.name(), "AA");
877        assert_eq!(Phoneme::ZH.name(), "ZH");
878        assert_eq!(Phoneme::NG.name(), "NG");
879        assert_eq!(Phoneme::IY.name(), "IY");
880        assert_eq!(Phoneme::B.name(), "B");
881
882        // Viseme names
883        assert_eq!(Viseme::Silence.name(), "Silence");
884        assert_eq!(Viseme::PP.name(), "PP");
885        assert_eq!(Viseme::Aa.name(), "Aa");
886        assert_eq!(Viseme::RR.name(), "RR");
887    }
888}