oxihuman_morph/
voice_driven_anim.rs

1//! Voice/audio-driven animation (speech envelope → jaw, viseme blending).
2
3#[allow(dead_code)]
4pub struct AudioFrame {
5    pub time: f32,
6    pub amplitude: f32,
7    pub frequency: f32,
8    pub voiced: bool,
9}
10
11#[allow(dead_code)]
12pub struct JawCurve {
13    pub keys: Vec<(f32, f32)>,
14}
15
16#[allow(dead_code)]
17pub struct VoiceAnimConfig {
18    pub jaw_scale: f32,
19    pub jaw_smooth: f32,
20    pub min_amplitude: f32,
21    pub viseme_blend_time: f32,
22}
23
24#[allow(dead_code)]
25pub struct VoiceAnimResult {
26    pub jaw_curve: JawCurve,
27    pub viseme_weights: Vec<Vec<f32>>,
28    pub frame_times: Vec<f32>,
29}
30
31#[allow(dead_code)]
32pub fn default_voice_anim_config() -> VoiceAnimConfig {
33    VoiceAnimConfig {
34        jaw_scale: 0.8,
35        jaw_smooth: 0.05,
36        min_amplitude: 0.02,
37        viseme_blend_time: 0.08,
38    }
39}
40
41#[allow(dead_code)]
42pub fn amplitude_to_jaw(amplitude: f32, cfg: &VoiceAnimConfig) -> f32 {
43    let v = amplitude.clamp(0.0, 1.0) * cfg.jaw_scale;
44    v.clamp(0.0, 1.0)
45}
46
47#[allow(dead_code)]
48pub fn smooth_jaw_curve(curve: &JawCurve, window: f32) -> JawCurve {
49    if curve.keys.is_empty() {
50        return JawCurve { keys: Vec::new() };
51    }
52    let half = window * 0.5;
53    let keys: Vec<(f32, f32)> = curve
54        .keys
55        .iter()
56        .map(|&(t, _)| {
57            let mut sum = 0.0f32;
58            let mut count = 0u32;
59            for &(kt, kv) in &curve.keys {
60                if (kt - t).abs() <= half {
61                    sum += kv;
62                    count += 1;
63                }
64            }
65            (t, if count > 0 { sum / count as f32 } else { 0.0 })
66        })
67        .collect();
68    JawCurve { keys }
69}
70
71#[allow(dead_code)]
72pub fn audio_frames_to_jaw_curve(frames: &[AudioFrame], cfg: &VoiceAnimConfig) -> JawCurve {
73    let keys: Vec<(f32, f32)> = frames
74        .iter()
75        .map(|f| {
76            let jaw = if f.amplitude >= cfg.min_amplitude {
77                amplitude_to_jaw(f.amplitude, cfg)
78            } else {
79                0.0
80            };
81            (f.time, jaw)
82        })
83        .collect();
84    JawCurve { keys }
85}
86
87#[allow(dead_code)]
88pub fn sample_jaw_curve(curve: &JawCurve, time: f32) -> f32 {
89    if curve.keys.is_empty() {
90        return 0.0;
91    }
92    if curve.keys.len() == 1 {
93        return curve.keys[0].1;
94    }
95    let first = curve.keys[0];
96    let last = curve.keys[curve.keys.len() - 1];
97    if time <= first.0 {
98        return first.1;
99    }
100    if time >= last.0 {
101        return last.1;
102    }
103    for i in 0..curve.keys.len() - 1 {
104        let (t0, v0) = curve.keys[i];
105        let (t1, v1) = curve.keys[i + 1];
106        if time >= t0 && time <= t1 {
107            let span = t1 - t0;
108            if span < 1e-9 {
109                return v0;
110            }
111            let alpha = (time - t0) / span;
112            return v0 + (v1 - v0) * alpha;
113        }
114    }
115    last.1
116}
117
118#[allow(dead_code)]
119pub fn voiced_segments(frames: &[AudioFrame], min_amplitude: f32) -> Vec<(f32, f32)> {
120    let mut segments: Vec<(f32, f32)> = Vec::new();
121    let mut in_segment = false;
122    let mut seg_start = 0.0f32;
123
124    for frame in frames {
125        let active = frame.voiced && frame.amplitude >= min_amplitude;
126        if active && !in_segment {
127            in_segment = true;
128            seg_start = frame.time;
129        } else if !active && in_segment {
130            in_segment = false;
131            segments.push((seg_start, frame.time));
132        }
133    }
134    if in_segment {
135        if let Some(last) = frames.last() {
136            segments.push((seg_start, last.time));
137        }
138    }
139    segments
140}
141
142/// Map dominant frequency to one of 14 viseme indices.
143/// Rough mapping based on formant frequency ranges.
144#[allow(dead_code)]
145pub fn frequency_to_viseme_index(frequency: f32) -> usize {
146    // 14 viseme buckets across 80..3400 Hz range
147    let range_min = 80.0f32;
148    let range_max = 3400.0f32;
149    let clamped = frequency.clamp(range_min, range_max);
150    let normalized = (clamped - range_min) / (range_max - range_min);
151    let idx = (normalized * 13.9) as usize;
152    idx.min(13)
153}
154
155#[allow(dead_code)]
156pub fn frames_to_viseme_weights(frames: &[AudioFrame]) -> Vec<Vec<f32>> {
157    frames
158        .iter()
159        .map(|f| {
160            let mut weights = vec![0.0f32; 14];
161            let idx = frequency_to_viseme_index(f.frequency);
162            weights[idx] = f.amplitude.clamp(0.0, 1.0);
163            weights
164        })
165        .collect()
166}
167
168#[allow(dead_code)]
169pub fn jaw_curve_duration(curve: &JawCurve) -> f32 {
170    if curve.keys.len() < 2 {
171        return 0.0;
172    }
173    curve.keys.last().map(|k| k.0).unwrap_or(0.0) - curve.keys.first().map(|k| k.0).unwrap_or(0.0)
174}
175
176#[allow(dead_code)]
177pub fn jaw_curve_max(curve: &JawCurve) -> f32 {
178    curve.keys.iter().map(|k| k.1).fold(0.0f32, f32::max)
179}
180
181#[allow(dead_code)]
182pub fn blend_jaw_curves(a: &JawCurve, b: &JawCurve, t: f32) -> JawCurve {
183    let t = t.clamp(0.0, 1.0);
184    // Use keys from a, sample b at same times
185    let keys: Vec<(f32, f32)> = a
186        .keys
187        .iter()
188        .map(|&(time, va)| {
189            let vb = sample_jaw_curve(b, time);
190            (time, va + (vb - va) * t)
191        })
192        .collect();
193    JawCurve { keys }
194}
195
196#[allow(dead_code)]
197pub fn voice_anim_from_frames(frames: &[AudioFrame], cfg: &VoiceAnimConfig) -> VoiceAnimResult {
198    let jaw_curve_raw = audio_frames_to_jaw_curve(frames, cfg);
199    let jaw_curve = smooth_jaw_curve(&jaw_curve_raw, cfg.jaw_smooth);
200    let viseme_weights = frames_to_viseme_weights(frames);
201    let frame_times: Vec<f32> = frames.iter().map(|f| f.time).collect();
202    VoiceAnimResult {
203        jaw_curve,
204        viseme_weights,
205        frame_times,
206    }
207}
208
209#[allow(dead_code)]
210pub fn silence_duration(frames: &[AudioFrame], cfg: &VoiceAnimConfig) -> f32 {
211    frames
212        .iter()
213        .filter(|f| f.amplitude < cfg.min_amplitude)
214        .count() as f32
215        / frames.len().max(1) as f32
216        * jaw_curve_duration(&JawCurve {
217            keys: frames.iter().map(|f| (f.time, 0.0)).collect(),
218        })
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    fn make_frames(n: usize) -> Vec<AudioFrame> {
226        (0..n)
227            .map(|i| AudioFrame {
228                time: i as f32 * 0.033,
229                amplitude: if i % 3 == 0 { 0.01 } else { 0.5 },
230                frequency: 200.0 + i as f32 * 50.0,
231                voiced: i % 3 != 0,
232            })
233            .collect()
234    }
235
236    #[test]
237    fn test_amplitude_to_jaw_in_range() {
238        let cfg = default_voice_anim_config();
239        let v = amplitude_to_jaw(0.5, &cfg);
240        assert!((0.0..=1.0).contains(&v));
241    }
242
243    #[test]
244    fn test_amplitude_to_jaw_zero() {
245        let cfg = default_voice_anim_config();
246        assert_eq!(amplitude_to_jaw(0.0, &cfg), 0.0);
247    }
248
249    #[test]
250    fn test_amplitude_to_jaw_max() {
251        let cfg = default_voice_anim_config();
252        let v = amplitude_to_jaw(1.0, &cfg);
253        assert!(v <= 1.0);
254    }
255
256    #[test]
257    fn test_audio_frames_to_jaw_curve_length() {
258        let cfg = default_voice_anim_config();
259        let frames = make_frames(10);
260        let curve = audio_frames_to_jaw_curve(&frames, &cfg);
261        assert_eq!(curve.keys.len(), 10);
262    }
263
264    #[test]
265    fn test_sample_at_t0() {
266        let curve = JawCurve {
267            keys: vec![(0.0, 0.3), (1.0, 0.8)],
268        };
269        let v = sample_jaw_curve(&curve, 0.0);
270        assert!((v - 0.3).abs() < 1e-6);
271    }
272
273    #[test]
274    fn test_sample_interpolation() {
275        let curve = JawCurve {
276            keys: vec![(0.0, 0.0), (1.0, 1.0)],
277        };
278        let v = sample_jaw_curve(&curve, 0.5);
279        assert!((v - 0.5).abs() < 1e-6);
280    }
281
282    #[test]
283    fn test_voiced_segments_count() {
284        let frames = vec![
285            AudioFrame {
286                time: 0.0,
287                amplitude: 0.5,
288                frequency: 200.0,
289                voiced: true,
290            },
291            AudioFrame {
292                time: 0.1,
293                amplitude: 0.5,
294                frequency: 200.0,
295                voiced: true,
296            },
297            AudioFrame {
298                time: 0.2,
299                amplitude: 0.01,
300                frequency: 200.0,
301                voiced: false,
302            },
303            AudioFrame {
304                time: 0.3,
305                amplitude: 0.5,
306                frequency: 200.0,
307                voiced: true,
308            },
309            AudioFrame {
310                time: 0.4,
311                amplitude: 0.5,
312                frequency: 200.0,
313                voiced: true,
314            },
315        ];
316        let segs = voiced_segments(&frames, 0.02);
317        assert_eq!(segs.len(), 2);
318    }
319
320    #[test]
321    fn test_frequency_to_viseme_index_valid() {
322        for freq in [100.0f32, 200.0, 500.0, 1000.0, 2000.0, 3000.0] {
323            let idx = frequency_to_viseme_index(freq);
324            assert!(idx < 14, "viseme index out of range for freq {}", freq);
325        }
326    }
327
328    #[test]
329    fn test_jaw_curve_max() {
330        let curve = JawCurve {
331            keys: vec![(0.0, 0.2), (0.5, 0.9), (1.0, 0.4)],
332        };
333        let m = jaw_curve_max(&curve);
334        assert!((m - 0.9).abs() < 1e-6);
335    }
336
337    #[test]
338    fn test_smooth_does_not_change_length() {
339        let cfg = default_voice_anim_config();
340        let frames = make_frames(20);
341        let curve = audio_frames_to_jaw_curve(&frames, &cfg);
342        let smoothed = smooth_jaw_curve(&curve, 0.05);
343        assert_eq!(smoothed.keys.len(), curve.keys.len());
344    }
345
346    #[test]
347    fn test_silence_duration_all_silent() {
348        let cfg = default_voice_anim_config();
349        let frames: Vec<AudioFrame> = (0..10)
350            .map(|i| AudioFrame {
351                time: i as f32 * 0.1,
352                amplitude: 0.0,
353                frequency: 200.0,
354                voiced: false,
355            })
356            .collect();
357        let sd = silence_duration(&frames, &cfg);
358        assert!(sd >= 0.0);
359    }
360
361    #[test]
362    fn test_frames_to_viseme_weights_length() {
363        let frames = make_frames(5);
364        let w = frames_to_viseme_weights(&frames);
365        assert_eq!(w.len(), 5);
366        for row in &w {
367            assert_eq!(row.len(), 14);
368        }
369    }
370
371    #[test]
372    fn test_jaw_curve_duration() {
373        let curve = JawCurve {
374            keys: vec![(0.0, 0.0), (0.5, 0.5), (1.5, 0.3)],
375        };
376        let d = jaw_curve_duration(&curve);
377        assert!((d - 1.5).abs() < 1e-6);
378    }
379
380    #[test]
381    fn test_blend_jaw_curves() {
382        let a = JawCurve {
383            keys: vec![(0.0, 0.0), (1.0, 1.0)],
384        };
385        let b = JawCurve {
386            keys: vec![(0.0, 1.0), (1.0, 0.0)],
387        };
388        let blended = blend_jaw_curves(&a, &b, 0.5);
389        assert!((blended.keys[0].1 - 0.5).abs() < 1e-6);
390        assert!((blended.keys[1].1 - 0.5).abs() < 1e-6);
391    }
392
393    #[test]
394    fn test_voice_anim_from_frames() {
395        let cfg = default_voice_anim_config();
396        let frames = make_frames(8);
397        let result = voice_anim_from_frames(&frames, &cfg);
398        assert_eq!(result.jaw_curve.keys.len(), 8);
399        assert_eq!(result.viseme_weights.len(), 8);
400        assert_eq!(result.frame_times.len(), 8);
401    }
402}
oxihuman_morph/voice_driven_anim.rs

oxihuman_morph/
voice_driven_anim.rs