#[allow(dead_code)]
pub struct AudioFrame {
pub time: f32,
pub amplitude: f32,
pub frequency: f32,
pub voiced: bool,
}
#[allow(dead_code)]
pub struct JawCurve {
pub keys: Vec<(f32, f32)>,
}
#[allow(dead_code)]
pub struct VoiceAnimConfig {
pub jaw_scale: f32,
pub jaw_smooth: f32,
pub min_amplitude: f32,
pub viseme_blend_time: f32,
}
#[allow(dead_code)]
pub struct VoiceAnimResult {
pub jaw_curve: JawCurve,
pub viseme_weights: Vec<Vec<f32>>,
pub frame_times: Vec<f32>,
}
#[allow(dead_code)]
pub fn default_voice_anim_config() -> VoiceAnimConfig {
VoiceAnimConfig {
jaw_scale: 0.8,
jaw_smooth: 0.05,
min_amplitude: 0.02,
viseme_blend_time: 0.08,
}
}
#[allow(dead_code)]
pub fn amplitude_to_jaw(amplitude: f32, cfg: &VoiceAnimConfig) -> f32 {
let v = amplitude.clamp(0.0, 1.0) * cfg.jaw_scale;
v.clamp(0.0, 1.0)
}
#[allow(dead_code)]
pub fn smooth_jaw_curve(curve: &JawCurve, window: f32) -> JawCurve {
if curve.keys.is_empty() {
return JawCurve { keys: Vec::new() };
}
let half = window * 0.5;
let keys: Vec<(f32, f32)> = curve
.keys
.iter()
.map(|&(t, _)| {
let mut sum = 0.0f32;
let mut count = 0u32;
for &(kt, kv) in &curve.keys {
if (kt - t).abs() <= half {
sum += kv;
count += 1;
}
}
(t, if count > 0 { sum / count as f32 } else { 0.0 })
})
.collect();
JawCurve { keys }
}
#[allow(dead_code)]
pub fn audio_frames_to_jaw_curve(frames: &[AudioFrame], cfg: &VoiceAnimConfig) -> JawCurve {
let keys: Vec<(f32, f32)> = frames
.iter()
.map(|f| {
let jaw = if f.amplitude >= cfg.min_amplitude {
amplitude_to_jaw(f.amplitude, cfg)
} else {
0.0
};
(f.time, jaw)
})
.collect();
JawCurve { keys }
}
#[allow(dead_code)]
pub fn sample_jaw_curve(curve: &JawCurve, time: f32) -> f32 {
if curve.keys.is_empty() {
return 0.0;
}
if curve.keys.len() == 1 {
return curve.keys[0].1;
}
let first = curve.keys[0];
let last = curve.keys[curve.keys.len() - 1];
if time <= first.0 {
return first.1;
}
if time >= last.0 {
return last.1;
}
for i in 0..curve.keys.len() - 1 {
let (t0, v0) = curve.keys[i];
let (t1, v1) = curve.keys[i + 1];
if time >= t0 && time <= t1 {
let span = t1 - t0;
if span < 1e-9 {
return v0;
}
let alpha = (time - t0) / span;
return v0 + (v1 - v0) * alpha;
}
}
last.1
}
#[allow(dead_code)]
pub fn voiced_segments(frames: &[AudioFrame], min_amplitude: f32) -> Vec<(f32, f32)> {
let mut segments: Vec<(f32, f32)> = Vec::new();
let mut in_segment = false;
let mut seg_start = 0.0f32;
for frame in frames {
let active = frame.voiced && frame.amplitude >= min_amplitude;
if active && !in_segment {
in_segment = true;
seg_start = frame.time;
} else if !active && in_segment {
in_segment = false;
segments.push((seg_start, frame.time));
}
}
if in_segment {
if let Some(last) = frames.last() {
segments.push((seg_start, last.time));
}
}
segments
}
#[allow(dead_code)]
pub fn frequency_to_viseme_index(frequency: f32) -> usize {
let range_min = 80.0f32;
let range_max = 3400.0f32;
let clamped = frequency.clamp(range_min, range_max);
let normalized = (clamped - range_min) / (range_max - range_min);
let idx = (normalized * 13.9) as usize;
idx.min(13)
}
#[allow(dead_code)]
pub fn frames_to_viseme_weights(frames: &[AudioFrame]) -> Vec<Vec<f32>> {
frames
.iter()
.map(|f| {
let mut weights = vec![0.0f32; 14];
let idx = frequency_to_viseme_index(f.frequency);
weights[idx] = f.amplitude.clamp(0.0, 1.0);
weights
})
.collect()
}
#[allow(dead_code)]
pub fn jaw_curve_duration(curve: &JawCurve) -> f32 {
if curve.keys.len() < 2 {
return 0.0;
}
curve.keys.last().map(|k| k.0).unwrap_or(0.0) - curve.keys.first().map(|k| k.0).unwrap_or(0.0)
}
#[allow(dead_code)]
pub fn jaw_curve_max(curve: &JawCurve) -> f32 {
curve.keys.iter().map(|k| k.1).fold(0.0f32, f32::max)
}
#[allow(dead_code)]
pub fn blend_jaw_curves(a: &JawCurve, b: &JawCurve, t: f32) -> JawCurve {
let t = t.clamp(0.0, 1.0);
let keys: Vec<(f32, f32)> = a
.keys
.iter()
.map(|&(time, va)| {
let vb = sample_jaw_curve(b, time);
(time, va + (vb - va) * t)
})
.collect();
JawCurve { keys }
}
#[allow(dead_code)]
pub fn voice_anim_from_frames(frames: &[AudioFrame], cfg: &VoiceAnimConfig) -> VoiceAnimResult {
let jaw_curve_raw = audio_frames_to_jaw_curve(frames, cfg);
let jaw_curve = smooth_jaw_curve(&jaw_curve_raw, cfg.jaw_smooth);
let viseme_weights = frames_to_viseme_weights(frames);
let frame_times: Vec<f32> = frames.iter().map(|f| f.time).collect();
VoiceAnimResult {
jaw_curve,
viseme_weights,
frame_times,
}
}
#[allow(dead_code)]
pub fn silence_duration(frames: &[AudioFrame], cfg: &VoiceAnimConfig) -> f32 {
frames
.iter()
.filter(|f| f.amplitude < cfg.min_amplitude)
.count() as f32
/ frames.len().max(1) as f32
* jaw_curve_duration(&JawCurve {
keys: frames.iter().map(|f| (f.time, 0.0)).collect(),
})
}
#[cfg(test)]
mod tests {
use super::*;
fn make_frames(n: usize) -> Vec<AudioFrame> {
(0..n)
.map(|i| AudioFrame {
time: i as f32 * 0.033,
amplitude: if i % 3 == 0 { 0.01 } else { 0.5 },
frequency: 200.0 + i as f32 * 50.0,
voiced: i % 3 != 0,
})
.collect()
}
#[test]
fn test_amplitude_to_jaw_in_range() {
let cfg = default_voice_anim_config();
let v = amplitude_to_jaw(0.5, &cfg);
assert!((0.0..=1.0).contains(&v));
}
#[test]
fn test_amplitude_to_jaw_zero() {
let cfg = default_voice_anim_config();
assert_eq!(amplitude_to_jaw(0.0, &cfg), 0.0);
}
#[test]
fn test_amplitude_to_jaw_max() {
let cfg = default_voice_anim_config();
let v = amplitude_to_jaw(1.0, &cfg);
assert!(v <= 1.0);
}
#[test]
fn test_audio_frames_to_jaw_curve_length() {
let cfg = default_voice_anim_config();
let frames = make_frames(10);
let curve = audio_frames_to_jaw_curve(&frames, &cfg);
assert_eq!(curve.keys.len(), 10);
}
#[test]
fn test_sample_at_t0() {
let curve = JawCurve {
keys: vec![(0.0, 0.3), (1.0, 0.8)],
};
let v = sample_jaw_curve(&curve, 0.0);
assert!((v - 0.3).abs() < 1e-6);
}
#[test]
fn test_sample_interpolation() {
let curve = JawCurve {
keys: vec![(0.0, 0.0), (1.0, 1.0)],
};
let v = sample_jaw_curve(&curve, 0.5);
assert!((v - 0.5).abs() < 1e-6);
}
#[test]
fn test_voiced_segments_count() {
let frames = vec![
AudioFrame {
time: 0.0,
amplitude: 0.5,
frequency: 200.0,
voiced: true,
},
AudioFrame {
time: 0.1,
amplitude: 0.5,
frequency: 200.0,
voiced: true,
},
AudioFrame {
time: 0.2,
amplitude: 0.01,
frequency: 200.0,
voiced: false,
},
AudioFrame {
time: 0.3,
amplitude: 0.5,
frequency: 200.0,
voiced: true,
},
AudioFrame {
time: 0.4,
amplitude: 0.5,
frequency: 200.0,
voiced: true,
},
];
let segs = voiced_segments(&frames, 0.02);
assert_eq!(segs.len(), 2);
}
#[test]
fn test_frequency_to_viseme_index_valid() {
for freq in [100.0f32, 200.0, 500.0, 1000.0, 2000.0, 3000.0] {
let idx = frequency_to_viseme_index(freq);
assert!(idx < 14, "viseme index out of range for freq {}", freq);
}
}
#[test]
fn test_jaw_curve_max() {
let curve = JawCurve {
keys: vec![(0.0, 0.2), (0.5, 0.9), (1.0, 0.4)],
};
let m = jaw_curve_max(&curve);
assert!((m - 0.9).abs() < 1e-6);
}
#[test]
fn test_smooth_does_not_change_length() {
let cfg = default_voice_anim_config();
let frames = make_frames(20);
let curve = audio_frames_to_jaw_curve(&frames, &cfg);
let smoothed = smooth_jaw_curve(&curve, 0.05);
assert_eq!(smoothed.keys.len(), curve.keys.len());
}
#[test]
fn test_silence_duration_all_silent() {
let cfg = default_voice_anim_config();
let frames: Vec<AudioFrame> = (0..10)
.map(|i| AudioFrame {
time: i as f32 * 0.1,
amplitude: 0.0,
frequency: 200.0,
voiced: false,
})
.collect();
let sd = silence_duration(&frames, &cfg);
assert!(sd >= 0.0);
}
#[test]
fn test_frames_to_viseme_weights_length() {
let frames = make_frames(5);
let w = frames_to_viseme_weights(&frames);
assert_eq!(w.len(), 5);
for row in &w {
assert_eq!(row.len(), 14);
}
}
#[test]
fn test_jaw_curve_duration() {
let curve = JawCurve {
keys: vec![(0.0, 0.0), (0.5, 0.5), (1.5, 0.3)],
};
let d = jaw_curve_duration(&curve);
assert!((d - 1.5).abs() < 1e-6);
}
#[test]
fn test_blend_jaw_curves() {
let a = JawCurve {
keys: vec![(0.0, 0.0), (1.0, 1.0)],
};
let b = JawCurve {
keys: vec![(0.0, 1.0), (1.0, 0.0)],
};
let blended = blend_jaw_curves(&a, &b, 0.5);
assert!((blended.keys[0].1 - 0.5).abs() < 1e-6);
assert!((blended.keys[1].1 - 0.5).abs() < 1e-6);
}
#[test]
fn test_voice_anim_from_frames() {
let cfg = default_voice_anim_config();
let frames = make_frames(8);
let result = voice_anim_from_frames(&frames, &cfg);
assert_eq!(result.jaw_curve.keys.len(), 8);
assert_eq!(result.viseme_weights.len(), 8);
assert_eq!(result.frame_times.len(), 8);
}
}