1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
//! Voice profiles defining speaker characteristics.
//!
//! A `VoiceProfile` captures the acoustic parameters that distinguish one
//! speaker from another: fundamental frequency, formant scaling, breathiness,
//! vibrato, and micro-perturbations (jitter/shimmer).
use serde::{Deserialize, Serialize};
use crate::error::Result;
use crate::formant::VowelTarget;
use crate::glottal::GlottalSource;
/// A speaker's voice characteristics.
///
/// Use the preset constructors (`new_male`, `new_female`, `new_child`) or the
/// builder methods to create custom voices.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VoiceProfile {
/// Base fundamental frequency in Hz.
pub base_f0: f32,
/// F0 range (maximum deviation from base_f0) in Hz.
pub f0_range: f32,
/// Formant frequency scaling factor (1.0 = adult male reference).
pub formant_scale: f32,
/// Breathiness amount (0.0 = clear, 1.0 = very breathy).
pub breathiness: f32,
/// Vibrato rate in Hz (typically ~5 Hz).
pub vibrato_rate: f32,
/// Vibrato depth as fraction of f0 (typically ~0.05 = 5%).
pub vibrato_depth: f32,
/// Jitter: cycle-to-cycle f0 perturbation (fraction, typically 0.01-0.02).
pub jitter: f32,
/// Shimmer: cycle-to-cycle amplitude perturbation (fraction, typically 0.02-0.04).
pub shimmer: f32,
}
impl VoiceProfile {
/// Creates a typical adult male voice profile.
///
/// f0 = 120 Hz, formant_scale = 1.0 (reference).
#[must_use]
pub fn new_male() -> Self {
Self {
base_f0: 120.0,
f0_range: 40.0,
formant_scale: 1.0,
breathiness: 0.02,
vibrato_rate: 5.0,
vibrato_depth: 0.04,
jitter: 0.01,
shimmer: 0.02,
}
}
/// Creates a typical adult female voice profile.
///
/// f0 = 220 Hz, formant_scale = 1.17 (shorter vocal tract).
#[must_use]
pub fn new_female() -> Self {
Self {
base_f0: 220.0,
f0_range: 50.0,
formant_scale: 1.17,
breathiness: 0.05,
vibrato_rate: 5.5,
vibrato_depth: 0.05,
jitter: 0.008,
shimmer: 0.018,
}
}
/// Creates a typical child voice profile.
///
/// f0 = 300 Hz, formant_scale = 1.3 (even shorter vocal tract).
#[must_use]
pub fn new_child() -> Self {
Self {
base_f0: 300.0,
f0_range: 60.0,
formant_scale: 1.3,
breathiness: 0.03,
vibrato_rate: 6.0,
vibrato_depth: 0.03,
jitter: 0.012,
shimmer: 0.025,
}
}
/// Sets the base fundamental frequency (builder pattern).
#[must_use]
pub fn with_f0(mut self, f0: f32) -> Self {
self.base_f0 = f0;
self
}
/// Sets the breathiness amount (builder pattern).
#[must_use]
pub fn with_breathiness(mut self, b: f32) -> Self {
self.breathiness = b.clamp(0.0, 1.0);
self
}
/// Sets the vibrato rate in Hz (builder pattern).
#[must_use]
pub fn with_vibrato_rate(mut self, rate: f32) -> Self {
self.vibrato_rate = rate.max(0.0);
self
}
/// Sets the vibrato depth as fraction of f0 (builder pattern).
#[must_use]
pub fn with_vibrato_depth(mut self, depth: f32) -> Self {
self.vibrato_depth = depth.clamp(0.0, 0.5);
self
}
/// Sets the jitter amount (builder pattern).
#[must_use]
pub fn with_jitter(mut self, j: f32) -> Self {
self.jitter = j.clamp(0.0, 0.05);
self
}
/// Sets the shimmer amount (builder pattern).
#[must_use]
pub fn with_shimmer(mut self, s: f32) -> Self {
self.shimmer = s.clamp(0.0, 0.1);
self
}
/// Sets the formant scaling factor (builder pattern).
#[must_use]
pub fn with_formant_scale(mut self, scale: f32) -> Self {
self.formant_scale = scale.max(0.1);
self
}
/// Sets the f0 range (builder pattern).
#[must_use]
pub fn with_f0_range(mut self, range: f32) -> Self {
self.f0_range = range.max(0.0);
self
}
/// Creates a [`GlottalSource`] configured with this voice profile's parameters.
///
/// Sets f0, breathiness, jitter, shimmer, and vibrato from the profile.
///
/// # Errors
///
/// Returns an error if `base_f0` is outside the valid range.
pub fn create_glottal_source(&self, sample_rate: f32) -> Result<GlottalSource> {
let mut gs = GlottalSource::new(self.base_f0, sample_rate)?;
gs.set_breathiness(self.breathiness);
gs.set_jitter(self.jitter);
gs.set_shimmer(self.shimmer);
gs.set_vibrato(self.vibrato_rate, self.vibrato_depth);
Ok(gs)
}
/// Applies formant frequency and bandwidth scaling to a vowel target.
///
/// Frequencies are scaled by `formant_scale` (modeling vocal tract length).
/// Bandwidths are scaled by `sqrt(base_f0 / 120.0)` — higher f0 voices
/// (female, child) have wider bandwidths due to increased source-tract coupling.
#[must_use]
pub fn apply_formant_scale(&self, target: &VowelTarget) -> VowelTarget {
// Bandwidth scaling: sqrt(f0 / male_reference_f0)
let bw_scale = crate::math::f32::sqrt(self.base_f0 / 120.0);
VowelTarget::with_bandwidths(
[
target.f1 * self.formant_scale,
target.f2 * self.formant_scale,
target.f3 * self.formant_scale,
target.f4 * self.formant_scale,
target.f5 * self.formant_scale,
],
[
target.b1 * bw_scale,
target.b2 * bw_scale,
target.b3 * bw_scale,
target.b4 * bw_scale,
target.b5 * bw_scale,
],
)
}
}
impl Default for VoiceProfile {
fn default() -> Self {
Self::new_male()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_male_preset() {
let v = VoiceProfile::new_male();
assert!((v.base_f0 - 120.0).abs() < f32::EPSILON);
assert!((v.formant_scale - 1.0).abs() < f32::EPSILON);
}
#[test]
fn test_female_preset() {
let v = VoiceProfile::new_female();
assert!((v.base_f0 - 220.0).abs() < f32::EPSILON);
assert!((v.formant_scale - 1.17).abs() < f32::EPSILON);
}
#[test]
fn test_child_preset() {
let v = VoiceProfile::new_child();
assert!((v.base_f0 - 300.0).abs() < f32::EPSILON);
assert!((v.formant_scale - 1.3).abs() < f32::EPSILON);
}
#[test]
fn test_builder_pattern() {
let v = VoiceProfile::new_male()
.with_f0(150.0)
.with_breathiness(0.3)
.with_vibrato_rate(6.0);
assert!((v.base_f0 - 150.0).abs() < f32::EPSILON);
assert!((v.breathiness - 0.3).abs() < f32::EPSILON);
assert!((v.vibrato_rate - 6.0).abs() < f32::EPSILON);
}
#[test]
fn test_formant_scaling() {
let v = VoiceProfile::new_female();
let target = crate::formant::VowelTarget::from_vowel(crate::formant::Vowel::A);
let scaled = v.apply_formant_scale(&target);
assert!((scaled.f1 - target.f1 * 1.17).abs() < 0.01);
assert!((scaled.f2 - target.f2 * 1.17).abs() < 0.01);
}
#[test]
fn test_clamping() {
let v = VoiceProfile::new_male().with_breathiness(5.0);
assert!((v.breathiness - 1.0).abs() < f32::EPSILON);
let v = VoiceProfile::new_male().with_breathiness(-1.0);
assert!(v.breathiness.abs() < f32::EPSILON);
}
#[test]
fn test_serde_roundtrip() {
let v = VoiceProfile::new_female().with_f0(210.0);
let json = serde_json::to_string(&v).unwrap();
let v2: VoiceProfile = serde_json::from_str(&json).unwrap();
assert!((v2.base_f0 - 210.0).abs() < f32::EPSILON);
assert!((v2.formant_scale - 1.17).abs() < f32::EPSILON);
}
#[test]
fn test_default() {
let v = VoiceProfile::default();
assert!((v.base_f0 - 120.0).abs() < f32::EPSILON);
}
}