oxihuman_morph/
speech_prosody.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! Emotional prosody analysis and generation for speech parameters.
5//! Maps pitch, rate, emphasis and emotion to morph parameters.
6
7#[allow(dead_code)]
8#[derive(Debug, Clone)]
9pub struct ProsodyFeatures {
10    pub pitch_hz: f32,
11    pub pitch_range: f32,
12    pub speech_rate: f32,
13    pub loudness: f32,
14    pub energy: f32,
15    pub jitter: f32,
16    pub shimmer: f32,
17    pub pause_ratio: f32,
18}
19
20#[allow(dead_code)]
21#[derive(Debug, Clone, PartialEq)]
22pub enum ProsodyEmotion {
23    Neutral,
24    Happy,
25    Sad,
26    Angry,
27    Fearful,
28    Disgusted,
29    Surprised,
30    Calm,
31}
32
33#[allow(dead_code)]
34#[derive(Debug, Clone)]
35pub struct ProsodyProfile {
36    pub emotion: ProsodyEmotion,
37    pub intensity: f32,
38    pub features: ProsodyFeatures,
39}
40
41/// Rule-based classifier: maps prosody features to an emotion profile.
42#[allow(dead_code)]
43pub fn classify_prosody_emotion(features: &ProsodyFeatures) -> ProsodyProfile {
44    // Simple rule-based scoring for each emotion
45    let mut scores = [
46        (ProsodyEmotion::Neutral, 0.0_f32),
47        (ProsodyEmotion::Happy, 0.0_f32),
48        (ProsodyEmotion::Sad, 0.0_f32),
49        (ProsodyEmotion::Angry, 0.0_f32),
50        (ProsodyEmotion::Fearful, 0.0_f32),
51        (ProsodyEmotion::Disgusted, 0.0_f32),
52        (ProsodyEmotion::Surprised, 0.0_f32),
53        (ProsodyEmotion::Calm, 0.0_f32),
54    ];
55
56    // Happy: high pitch, very high rate (>5), high loudness, low jitter, low pause
57    scores[1].1 += if features.pitch_hz > 200.0 { 1.0 } else { 0.0 };
58    scores[1].1 += if features.speech_rate > 5.0 {
59        1.5
60    } else if features.speech_rate > 4.0 {
61        0.5
62    } else {
63        0.0
64    };
65    scores[1].1 += if features.loudness > 0.6 { 1.0 } else { 0.0 };
66    scores[1].1 += if features.pause_ratio < 0.15 {
67        0.5
68    } else {
69        0.0
70    };
71    scores[1].1 += if features.jitter < 0.03 { 0.5 } else { 0.0 }; // happy has low irregularity
72
73    // Sad: low pitch, low rate, low loudness, high pause_ratio, high jitter
74    scores[2].1 += if features.pitch_hz < 150.0 { 1.0 } else { 0.0 };
75    scores[2].1 += if features.speech_rate < 2.5 { 1.0 } else { 0.0 };
76    scores[2].1 += if features.loudness < 0.4 { 1.0 } else { 0.0 };
77    scores[2].1 += if features.pause_ratio > 0.4 { 1.0 } else { 0.0 };
78    scores[2].1 += if features.jitter > 0.05 { 0.5 } else { 0.0 };
79
80    // Angry: high pitch_range, high energy, very low pause_ratio (<0.1), high loudness, moderate speech rate
81    scores[3].1 += if features.pitch_range > 80.0 {
82        1.0
83    } else {
84        0.0
85    };
86    scores[3].1 += if features.energy > 0.7 { 1.0 } else { 0.0 };
87    scores[3].1 += if features.loudness > 0.7 { 1.0 } else { 0.0 };
88    scores[3].1 += if features.pause_ratio < 0.1 { 1.0 } else { 0.0 }; // angry pauses less
89    scores[3].1 += if features.speech_rate < 5.2 && features.speech_rate > 3.5 {
90        0.5
91    } else {
92        0.0
93    };
94
95    // Fearful: high jitter, high shimmer, moderate pitch, high rate
96    scores[4].1 += if features.jitter > 0.06 { 1.0 } else { 0.0 };
97    scores[4].1 += if features.shimmer > 0.06 { 1.0 } else { 0.0 };
98    scores[4].1 += if features.speech_rate > 4.5 { 0.5 } else { 0.0 };
99    scores[4].1 += if features.pause_ratio > 0.3 { 0.5 } else { 0.0 };
100
101    // Disgusted: low pitch, low rate, moderate jitter
102    scores[5].1 += if features.pitch_hz < 160.0 { 0.5 } else { 0.0 };
103    scores[5].1 += if features.speech_rate < 3.0 { 0.5 } else { 0.0 };
104    scores[5].1 += if features.jitter > 0.04 { 0.5 } else { 0.0 };
105    scores[5].1 += if features.shimmer > 0.04 { 0.5 } else { 0.0 };
106
107    // Surprised: high pitch, wide pitch_range, high rate
108    scores[6].1 += if features.pitch_hz > 220.0 { 1.0 } else { 0.0 };
109    scores[6].1 += if features.pitch_range > 100.0 {
110        1.0
111    } else {
112        0.0
113    };
114    scores[6].1 += if features.speech_rate > 5.0 { 0.5 } else { 0.0 };
115
116    // Calm: low jitter, low shimmer, low energy, moderate pause ratio, NOT too fast
117    scores[7].1 += if features.jitter < 0.02 { 1.0 } else { 0.0 };
118    scores[7].1 += if features.shimmer < 0.02 { 1.0 } else { 0.0 };
119    scores[7].1 += if features.energy < 0.45 {
120        1.0
121    } else if features.energy < 0.55 {
122        0.3
123    } else {
124        0.0
125    };
126    scores[7].1 += if features.pause_ratio > 0.2 && features.pause_ratio < 0.4 {
127        0.5
128    } else {
129        0.0
130    };
131
132    // Neutral: moderate values across the board — boosted by proximity to midpoints
133    let pitch_neutral = if (features.pitch_hz - 160.0).abs() < 20.0 {
134        1.2
135    } else {
136        0.0
137    };
138    let rate_neutral = if (features.speech_rate - 3.5).abs() < 0.5 {
139        1.2
140    } else {
141        0.0
142    };
143    let loudness_neutral = if (features.loudness - 0.5).abs() < 0.1 {
144        0.8
145    } else {
146        0.0
147    };
148    let energy_neutral = if (features.energy - 0.5).abs() < 0.1 {
149        0.5
150    } else {
151        0.0
152    };
153    scores[0].1 = pitch_neutral + rate_neutral + loudness_neutral + energy_neutral;
154
155    let best = scores
156        .iter()
157        .enumerate()
158        .max_by(|a, b| {
159            a.1 .1
160                .partial_cmp(&b.1 .1)
161                .unwrap_or(std::cmp::Ordering::Equal)
162        })
163        .map(|(i, _)| i)
164        .unwrap_or(0);
165
166    let total: f32 = scores.iter().map(|s| s.1).sum();
167    let intensity = if total > 0.0 {
168        (scores[best].1 / total).clamp(0.0, 1.0)
169    } else {
170        0.5
171    };
172
173    ProsodyProfile {
174        emotion: scores[best].0.clone(),
175        intensity,
176        features: features.clone(),
177    }
178}
179
180/// Generate canonical prosody features for a given emotion and intensity.
181#[allow(dead_code)]
182pub fn generate_prosody_for_emotion(emotion: &ProsodyEmotion, intensity: f32) -> ProsodyFeatures {
183    let t = intensity.clamp(0.0, 1.0);
184    let lerp = |a: f32, b: f32| a + (b - a) * t;
185
186    match emotion {
187        ProsodyEmotion::Neutral => ProsodyFeatures {
188            pitch_hz: 160.0,
189            pitch_range: 40.0,
190            speech_rate: 3.5,
191            loudness: 0.5,
192            energy: 0.5,
193            jitter: 0.01,
194            shimmer: 0.01,
195            pause_ratio: 0.25,
196        },
197        ProsodyEmotion::Happy => ProsodyFeatures {
198            pitch_hz: lerp(160.0, 230.0),
199            pitch_range: lerp(40.0, 110.0),
200            speech_rate: lerp(3.5, 5.5),
201            loudness: lerp(0.5, 0.85),
202            energy: lerp(0.5, 0.8),
203            jitter: lerp(0.01, 0.02),
204            shimmer: lerp(0.01, 0.02),
205            pause_ratio: lerp(0.25, 0.1),
206        },
207        ProsodyEmotion::Sad => ProsodyFeatures {
208            pitch_hz: lerp(160.0, 120.0),
209            pitch_range: lerp(40.0, 20.0),
210            speech_rate: lerp(3.5, 1.8),
211            loudness: lerp(0.5, 0.25),
212            energy: lerp(0.5, 0.2),
213            jitter: lerp(0.01, 0.08),
214            shimmer: lerp(0.01, 0.07),
215            pause_ratio: lerp(0.25, 0.55),
216        },
217        ProsodyEmotion::Angry => ProsodyFeatures {
218            pitch_hz: lerp(160.0, 200.0),
219            pitch_range: lerp(40.0, 120.0),
220            speech_rate: lerp(3.5, 5.0),
221            loudness: lerp(0.5, 0.95),
222            energy: lerp(0.5, 0.9),
223            jitter: lerp(0.01, 0.04),
224            shimmer: lerp(0.01, 0.05),
225            pause_ratio: lerp(0.25, 0.08),
226        },
227        ProsodyEmotion::Fearful => ProsodyFeatures {
228            pitch_hz: lerp(160.0, 210.0),
229            pitch_range: lerp(40.0, 90.0),
230            speech_rate: lerp(3.5, 5.5),
231            loudness: lerp(0.5, 0.6),
232            energy: lerp(0.5, 0.55),
233            jitter: lerp(0.01, 0.09),
234            shimmer: lerp(0.01, 0.08),
235            pause_ratio: lerp(0.25, 0.4),
236        },
237        ProsodyEmotion::Disgusted => ProsodyFeatures {
238            pitch_hz: lerp(160.0, 140.0),
239            pitch_range: lerp(40.0, 30.0),
240            speech_rate: lerp(3.5, 2.5),
241            loudness: lerp(0.5, 0.45),
242            energy: lerp(0.5, 0.4),
243            jitter: lerp(0.01, 0.06),
244            shimmer: lerp(0.01, 0.06),
245            pause_ratio: lerp(0.25, 0.35),
246        },
247        ProsodyEmotion::Surprised => ProsodyFeatures {
248            pitch_hz: lerp(160.0, 250.0),
249            pitch_range: lerp(40.0, 130.0),
250            speech_rate: lerp(3.5, 5.8),
251            loudness: lerp(0.5, 0.8),
252            energy: lerp(0.5, 0.75),
253            jitter: lerp(0.01, 0.03),
254            shimmer: lerp(0.01, 0.03),
255            pause_ratio: lerp(0.25, 0.12),
256        },
257        ProsodyEmotion::Calm => ProsodyFeatures {
258            pitch_hz: lerp(160.0, 155.0),
259            pitch_range: lerp(40.0, 20.0),
260            speech_rate: lerp(3.5, 2.8),
261            loudness: lerp(0.5, 0.35),
262            energy: lerp(0.5, 0.3),
263            jitter: lerp(0.01, 0.005),
264            shimmer: lerp(0.01, 0.005),
265            pause_ratio: lerp(0.25, 0.35),
266        },
267    }
268}
269
270/// Map prosody features to jaw/brow/lip morph parameters.
271#[allow(dead_code)]
272pub fn prosody_to_face_params(
273    features: &ProsodyFeatures,
274) -> std::collections::HashMap<String, f32> {
275    let mut map = std::collections::HashMap::new();
276
277    // Jaw open: driven by loudness and energy
278    let jaw_open = (features.loudness * 0.6 + features.energy * 0.4).clamp(0.0, 1.0);
279    map.insert("jaw_open".to_string(), jaw_open);
280
281    // Lip corners up: high pitch and happy-like features
282    let lip_corner_up = ((features.pitch_hz - 100.0) / 200.0).clamp(0.0, 1.0);
283    map.insert("lip_corner_up".to_string(), lip_corner_up);
284
285    // Brow raise: high pitch range and surprised features
286    let brow_raise = (features.pitch_range / 150.0).clamp(0.0, 1.0);
287    map.insert("brow_raise".to_string(), brow_raise);
288
289    // Brow furrow: high jitter (distress) and low pause ratio
290    let brow_furrow = (features.jitter * 5.0 + (1.0 - features.pause_ratio) * 0.2).clamp(0.0, 1.0);
291    map.insert("brow_furrow".to_string(), brow_furrow);
292
293    // Lip press: high energy maps to tighter lips
294    let lip_press = (features.energy * 0.5).clamp(0.0, 1.0);
295    map.insert("lip_press".to_string(), lip_press);
296
297    // Lip stretch (wide mouth): high speech rate
298    let lip_stretch = ((features.speech_rate - 2.0) / 5.0).clamp(0.0, 1.0);
299    map.insert("lip_stretch".to_string(), lip_stretch);
300
301    // Cheek raise: loudness
302    let cheek_raise = (features.loudness * 0.7).clamp(0.0, 1.0);
303    map.insert("cheek_raise".to_string(), cheek_raise);
304
305    map
306}
307
308/// Linear interpolation between two prosody feature sets.
309#[allow(dead_code)]
310pub fn interpolate_prosody(a: &ProsodyFeatures, b: &ProsodyFeatures, t: f32) -> ProsodyFeatures {
311    let t = t.clamp(0.0, 1.0);
312    let lerp = |x: f32, y: f32| x + (y - x) * t;
313    ProsodyFeatures {
314        pitch_hz: lerp(a.pitch_hz, b.pitch_hz),
315        pitch_range: lerp(a.pitch_range, b.pitch_range),
316        speech_rate: lerp(a.speech_rate, b.speech_rate),
317        loudness: lerp(a.loudness, b.loudness),
318        energy: lerp(a.energy, b.energy),
319        jitter: lerp(a.jitter, b.jitter),
320        shimmer: lerp(a.shimmer, b.shimmer),
321        pause_ratio: lerp(a.pause_ratio, b.pause_ratio),
322    }
323}
324
325/// Weighted blend of multiple emotion features.
326#[allow(dead_code)]
327pub fn blend_prosody_emotions(emotions: &[(ProsodyEmotion, f32)]) -> ProsodyFeatures {
328    if emotions.is_empty() {
329        return generate_prosody_for_emotion(&ProsodyEmotion::Neutral, 0.5);
330    }
331
332    let total_weight: f32 = emotions.iter().map(|(_, w)| w.max(0.0)).sum();
333    if total_weight <= 0.0 {
334        return generate_prosody_for_emotion(&ProsodyEmotion::Neutral, 0.5);
335    }
336
337    let mut result = ProsodyFeatures {
338        pitch_hz: 0.0,
339        pitch_range: 0.0,
340        speech_rate: 0.0,
341        loudness: 0.0,
342        energy: 0.0,
343        jitter: 0.0,
344        shimmer: 0.0,
345        pause_ratio: 0.0,
346    };
347
348    for (emotion, weight) in emotions {
349        let w = weight.max(0.0) / total_weight;
350        let f = generate_prosody_for_emotion(emotion, 0.7);
351        result.pitch_hz += f.pitch_hz * w;
352        result.pitch_range += f.pitch_range * w;
353        result.speech_rate += f.speech_rate * w;
354        result.loudness += f.loudness * w;
355        result.energy += f.energy * w;
356        result.jitter += f.jitter * w;
357        result.shimmer += f.shimmer * w;
358        result.pause_ratio += f.pause_ratio * w;
359    }
360
361    result
362}
363
364/// Cosine-like similarity between two prosody feature vectors, normalized to 0..1.
365#[allow(dead_code)]
366pub fn prosody_similarity(a: &ProsodyFeatures, b: &ProsodyFeatures) -> f32 {
367    // Normalize features to comparable scales then compute dot product similarity
368    let normalize = |f: &ProsodyFeatures| {
369        [
370            f.pitch_hz / 300.0,
371            f.pitch_range / 200.0,
372            f.speech_rate / 8.0,
373            f.loudness,
374            f.energy,
375            f.jitter * 10.0,
376            f.shimmer * 10.0,
377            f.pause_ratio,
378        ]
379    };
380
381    let na = normalize(a);
382    let nb = normalize(b);
383
384    let dot: f32 = na.iter().zip(nb.iter()).map(|(x, y)| x * y).sum();
385    let mag_a: f32 = na.iter().map(|x| x * x).sum::<f32>().sqrt();
386    let mag_b: f32 = nb.iter().map(|x| x * x).sum::<f32>().sqrt();
387
388    if mag_a < 1e-6 || mag_b < 1e-6 {
389        return 0.0;
390    }
391
392    (dot / (mag_a * mag_b)).clamp(0.0, 1.0)
393}
394
395/// Clamp all prosody fields to valid physical ranges.
396#[allow(dead_code)]
397pub fn normalize_prosody(features: &mut ProsodyFeatures) {
398    features.pitch_hz = features.pitch_hz.clamp(50.0, 600.0);
399    features.pitch_range = features.pitch_range.clamp(0.0, 300.0);
400    features.speech_rate = features.speech_rate.clamp(0.1, 10.0);
401    features.loudness = features.loudness.clamp(0.0, 1.0);
402    features.energy = features.energy.clamp(0.0, 1.0);
403    features.jitter = features.jitter.clamp(0.0, 1.0);
404    features.shimmer = features.shimmer.clamp(0.0, 1.0);
405    features.pause_ratio = features.pause_ratio.clamp(0.0, 1.0);
406}
407
408/// Serialize prosody features to a JSON string.
409#[allow(dead_code)]
410pub fn prosody_to_json(features: &ProsodyFeatures) -> String {
411    format!(
412        r#"{{"pitch_hz":{:.4},"pitch_range":{:.4},"speech_rate":{:.4},"loudness":{:.4},"energy":{:.4},"jitter":{:.4},"shimmer":{:.4},"pause_ratio":{:.4}}}"#,
413        features.pitch_hz,
414        features.pitch_range,
415        features.speech_rate,
416        features.loudness,
417        features.energy,
418        features.jitter,
419        features.shimmer,
420        features.pause_ratio,
421    )
422}
423
424/// Return the profile with highest intensity from a slice.
425#[allow(dead_code)]
426pub fn dominant_prosody_emotion(profiles: &[ProsodyProfile]) -> Option<&ProsodyProfile> {
427    profiles.iter().max_by(|a, b| {
428        a.intensity
429            .partial_cmp(&b.intensity)
430            .unwrap_or(std::cmp::Ordering::Equal)
431    })
432}
433
434/// Categorize speech rate into descriptive labels.
435#[allow(dead_code)]
436pub fn speech_rate_category(rate: f32) -> &'static str {
437    if rate < 2.0 {
438        "slow"
439    } else if rate < 4.0 {
440        "normal"
441    } else if rate < 6.0 {
442        "fast"
443    } else {
444        "very_fast"
445    }
446}
447
448/// Estimate (arousal, valence) in the 2D emotion circumplex model.
449/// Both values are in -1..1 range.
450#[allow(dead_code)]
451pub fn estimate_arousal_valence(features: &ProsodyFeatures) -> (f32, f32) {
452    // Arousal: driven by energy, speech_rate, loudness
453    let arousal =
454        (features.energy * 0.4 + features.speech_rate / 10.0 * 0.3 + features.loudness * 0.3) * 2.0
455            - 1.0;
456
457    // Valence: driven by pitch (higher = more positive), low jitter = more positive
458    let valence = ((features.pitch_hz - 100.0) / 300.0 * 0.5
459        + (1.0 - features.jitter * 10.0).clamp(0.0, 1.0) * 0.3
460        + (1.0 - features.pause_ratio) * 0.2)
461        * 2.0
462        - 1.0;
463
464    (arousal.clamp(-1.0, 1.0), valence.clamp(-1.0, 1.0))
465}
466
467#[cfg(test)]
468mod tests {
469    use super::*;
470
471    fn neutral_features() -> ProsodyFeatures {
472        ProsodyFeatures {
473            pitch_hz: 160.0,
474            pitch_range: 40.0,
475            speech_rate: 3.5,
476            loudness: 0.5,
477            energy: 0.5,
478            jitter: 0.01,
479            shimmer: 0.01,
480            pause_ratio: 0.25,
481        }
482    }
483
484    fn happy_features() -> ProsodyFeatures {
485        ProsodyFeatures {
486            pitch_hz: 230.0,
487            pitch_range: 110.0,
488            speech_rate: 5.5,
489            loudness: 0.85,
490            energy: 0.8,
491            jitter: 0.02,
492            shimmer: 0.02,
493            pause_ratio: 0.1,
494        }
495    }
496
497    fn sad_features() -> ProsodyFeatures {
498        ProsodyFeatures {
499            pitch_hz: 120.0,
500            pitch_range: 20.0,
501            speech_rate: 1.8,
502            loudness: 0.25,
503            energy: 0.2,
504            jitter: 0.08,
505            shimmer: 0.07,
506            pause_ratio: 0.55,
507        }
508    }
509
510    #[test]
511    fn test_classify_happy() {
512        let profile = classify_prosody_emotion(&happy_features());
513        assert_eq!(profile.emotion, ProsodyEmotion::Happy);
514    }
515
516    #[test]
517    fn test_classify_sad() {
518        let profile = classify_prosody_emotion(&sad_features());
519        assert_eq!(profile.emotion, ProsodyEmotion::Sad);
520    }
521
522    #[test]
523    fn test_classify_neutral() {
524        let profile = classify_prosody_emotion(&neutral_features());
525        assert_eq!(profile.emotion, ProsodyEmotion::Neutral);
526    }
527
528    #[test]
529    fn test_generate_happy_pitch_increases() {
530        let f = generate_prosody_for_emotion(&ProsodyEmotion::Happy, 1.0);
531        assert!(f.pitch_hz > 160.0);
532    }
533
534    #[test]
535    fn test_generate_sad_pitch_decreases() {
536        let f = generate_prosody_for_emotion(&ProsodyEmotion::Sad, 1.0);
537        assert!(f.pitch_hz < 160.0);
538    }
539
540    #[test]
541    fn test_prosody_to_face_params_keys() {
542        let map = prosody_to_face_params(&neutral_features());
543        assert!(map.contains_key("jaw_open"));
544        assert!(map.contains_key("brow_raise"));
545        assert!(map.contains_key("lip_corner_up"));
546    }
547
548    #[test]
549    fn test_prosody_to_face_params_range() {
550        let map = prosody_to_face_params(&happy_features());
551        for v in map.values() {
552            assert!(*v >= 0.0 && *v <= 1.0, "param out of range: {v}");
553        }
554    }
555
556    #[test]
557    fn test_interpolate_midpoint() {
558        let mid = interpolate_prosody(&neutral_features(), &happy_features(), 0.5);
559        assert!(mid.pitch_hz > 160.0 && mid.pitch_hz < 230.0);
560    }
561
562    #[test]
563    fn test_interpolate_t0_equals_a() {
564        let a = neutral_features();
565        let result = interpolate_prosody(&a, &happy_features(), 0.0);
566        assert!((result.pitch_hz - a.pitch_hz).abs() < 1e-4);
567    }
568
569    #[test]
570    fn test_blend_single_emotion() {
571        let blended = blend_prosody_emotions(&[(ProsodyEmotion::Happy, 1.0)]);
572        let expected = generate_prosody_for_emotion(&ProsodyEmotion::Happy, 0.7);
573        assert!((blended.pitch_hz - expected.pitch_hz).abs() < 1e-3);
574    }
575
576    #[test]
577    fn test_blend_empty_returns_neutral() {
578        let blended = blend_prosody_emotions(&[]);
579        assert!((blended.speech_rate - 3.5).abs() < 0.5);
580    }
581
582    #[test]
583    fn test_prosody_similarity_self() {
584        let f = neutral_features();
585        let sim = prosody_similarity(&f, &f);
586        assert!(
587            (sim - 1.0).abs() < 1e-4,
588            "self-similarity should be 1.0, got {sim}"
589        );
590    }
591
592    #[test]
593    fn test_prosody_similarity_different() {
594        let sim = prosody_similarity(&happy_features(), &sad_features());
595        assert!(sim < 1.0);
596    }
597
598    #[test]
599    fn test_normalize_prosody_clamps() {
600        let mut f = ProsodyFeatures {
601            pitch_hz: -100.0,
602            pitch_range: 9999.0,
603            speech_rate: -5.0,
604            loudness: 2.0,
605            energy: -0.5,
606            jitter: 5.0,
607            shimmer: 5.0,
608            pause_ratio: 3.0,
609        };
610        normalize_prosody(&mut f);
611        assert!(f.pitch_hz >= 50.0);
612        assert!(f.loudness <= 1.0);
613        assert!(f.jitter <= 1.0);
614    }
615
616    #[test]
617    fn test_prosody_to_json_contains_fields() {
618        let json = prosody_to_json(&neutral_features());
619        assert!(json.contains("pitch_hz"));
620        assert!(json.contains("speech_rate"));
621    }
622
623    #[test]
624    fn test_dominant_prosody_emotion() {
625        let profiles = vec![
626            ProsodyProfile {
627                emotion: ProsodyEmotion::Happy,
628                intensity: 0.3,
629                features: happy_features(),
630            },
631            ProsodyProfile {
632                emotion: ProsodyEmotion::Sad,
633                intensity: 0.8,
634                features: sad_features(),
635            },
636        ];
637        let dom = dominant_prosody_emotion(&profiles).expect("should succeed");
638        assert_eq!(dom.emotion, ProsodyEmotion::Sad);
639    }
640
641    #[test]
642    fn test_speech_rate_category() {
643        assert_eq!(speech_rate_category(1.0), "slow");
644        assert_eq!(speech_rate_category(3.0), "normal");
645        assert_eq!(speech_rate_category(5.0), "fast");
646        assert_eq!(speech_rate_category(7.0), "very_fast");
647    }
648
649    #[test]
650    fn test_estimate_arousal_valence_range() {
651        let (arousal, valence) = estimate_arousal_valence(&neutral_features());
652        assert!((-1.0..=1.0).contains(&arousal));
653        assert!((-1.0..=1.0).contains(&valence));
654    }
655
656    #[test]
657    fn test_arousal_higher_for_angry() {
658        let angry = generate_prosody_for_emotion(&ProsodyEmotion::Angry, 1.0);
659        let calm = generate_prosody_for_emotion(&ProsodyEmotion::Calm, 1.0);
660        let (a_angry, _) = estimate_arousal_valence(&angry);
661        let (a_calm, _) = estimate_arousal_valence(&calm);
662        assert!(
663            a_angry > a_calm,
664            "angry arousal {a_angry} should exceed calm {a_calm}"
665        );
666    }
667}
oxihuman_morph/speech_prosody.rs

oxihuman_morph/
speech_prosody.rs