pub mod targets;
pub mod engine;
pub mod phondata;
pub mod bytecode;
pub mod wavegen;
pub mod setlengths;
pub mod sample;
use crate::error::{Error, Result};
#[derive(Debug, Clone, Default)]
pub struct Frame {
pub flags: u16,
pub ffreq: [i16; 7],
pub length: u8,
pub rms: u8,
pub fheight: [u8; 8],
pub fwidth: [u8; 6],
pub fright: [u8; 3],
pub bw: [u8; 4],
pub klattp: [u8; 5],
pub klattp2: [u8; 5],
pub klatt_ap: [u8; 7],
pub klatt_bp: [u8; 7],
pub spare: u8,
}
impl Frame {
pub const C_SIZE: usize = 64;
}
#[derive(Debug, Clone, Default)]
pub struct Resonator {
pub a: f64,
pub b: f64,
pub c: f64,
pub x1: f64,
pub x2: f64,
}
impl Resonator {
#[inline]
pub fn tick(&mut self, input: f64) -> f64 {
let y = self.a * input + self.b * self.x1 + self.c * self.x2;
self.x2 = self.x1;
self.x1 = y;
y
}
pub fn reset(&mut self) {
self.x1 = 0.0;
self.x2 = 0.0;
}
}
#[derive(Debug, Clone)]
pub struct VoiceParams {
pub speed_percent: u32,
pub pitch_hz: u32,
pub pitch_range: u32,
pub formant_scale: u32,
pub sample_rate: u32,
pub amplitude: u32,
}
impl Default for VoiceParams {
fn default() -> Self {
VoiceParams {
speed_percent: 100,
pitch_hz: 118, pitch_range: 100,
formant_scale: 100,
sample_rate: 22050,
amplitude: 80,
}
}
}
pub type PcmBuffer = Vec<i16>;
pub struct Synthesizer {
pub voice: VoiceParams,
}
impl Synthesizer {
pub fn new(voice: VoiceParams) -> Self {
Synthesizer { voice }
}
pub fn synthesize(&self, phonemes: &str) -> Result<PcmBuffer> {
if phonemes.is_empty() {
return Ok(Vec::new());
}
let segments = engine::parse_ipa(phonemes, &self.voice);
if segments.is_empty() {
return Err(Error::InvalidData(
format!("no recognisable phonemes in {:?}", phonemes)
));
}
let pcm = engine::synthesize_segments(&segments, &self.voice);
Ok(pcm)
}
pub fn synthesize_codes(
&self,
codes: &[crate::translate::PhonemeCode],
phdata: &crate::phoneme::PhonemeData,
) -> Result<PcmBuffer> {
if codes.is_empty() {
return Ok(Vec::new());
}
let speed_factor = 100.0 / self.voice.speed_percent.max(1) as f64;
let annotated = annotate_codes(codes, phdata);
let mut output_i16: Vec<i16> = Vec::new();
let mut wavephase: i32 = i32::MAX;
let sil_samples = |ms: f64| -> usize {
((ms / 1000.0) * 22050.0 * speed_factor) as usize
};
for ann in &annotated {
match ann {
AnnCode::Pause(ms) => {
let n = sil_samples(*ms);
output_i16.extend(std::iter::repeat(0i16).take(n));
}
AnnCode::WordBoundary => {
let n = sil_samples(50.0);
output_i16.extend(std::iter::repeat(0i16).take(n));
}
AnnCode::PrepauseSamples(n) => {
output_i16.extend(std::iter::repeat(0i16).take(*n));
}
AnnCode::Phoneme(info) => {
let samples = synthesize_phoneme_info(info, phdata, &self.voice,
speed_factor, &mut wavephase);
output_i16.extend_from_slice(&samples);
}
}
}
Ok(output_i16)
}
pub fn sample_rate(&self) -> u32 {
self.voice.sample_rate
}
}
struct PhonemeInfo {
code: u8,
ph_type: u8,
stress_level: u8,
lengthen: bool,
next_lm: u8,
next2_lm: u8,
more_syllables: bool,
end_of_clause: bool,
std_length: u8,
}
enum AnnCode {
Pause(f64),
WordBoundary,
PrepauseSamples(usize),
Phoneme(PhonemeInfo),
}
fn annotate_codes(
codes: &[crate::translate::PhonemeCode],
phdata: &crate::phoneme::PhonemeData,
) -> Vec<AnnCode> {
let mut result = Vec::new();
let ph_info = |c: u8| -> (u8, u8, u8) {
if let Some(ph) = phdata.get(c) {
(ph.typ, ph.length_mod, ph.std_length)
} else {
(0, 0, 0)
}
};
let flat: Vec<(u8, bool)> = codes.iter().map(|c| (c.code, c.is_boundary)).collect();
let n = flat.len();
let mut i = 0;
let mut pending_stress: u8 = 0;
let mut pending_lengthen = false;
while i < n {
let (code, is_boundary) = flat[i];
i += 1;
match code {
0 if is_boundary => {
result.push(AnnCode::Pause(200.0));
pending_stress = 0;
}
0 => {
}
1..=7 if !is_boundary => {
pending_stress = code;
}
9 => {
result.push(AnnCode::Pause(80.0));
}
12 => {
pending_lengthen = true;
}
15 if is_boundary => {
result.push(AnnCode::WordBoundary);
pending_stress = 0;
}
_ => {
let (ph_type, ph_lm, std_length) = ph_info(code);
let mut next_code = 0u8;
let mut next_is_boundary = false;
let mut j = i;
while j < n {
let (nc, nb) = flat[j];
j += 1;
if nb || nc == 9 || nc == 12 { continue; }
if nc >= 1 && nc <= 7 { continue; }
next_code = nc;
next_is_boundary = nb;
break;
}
let mut next2_code = 0u8;
while j < n {
let (nc, nb) = flat[j];
j += 1;
if nb || nc == 9 || nc == 12 { continue; }
if nc >= 1 && nc <= 7 { continue; }
next2_code = nc;
break;
}
let (next_type, next_lm, _) = ph_info(next_code);
let (next2_type, next2_lm, _) = ph_info(next2_code);
let end_of_clause = next_code == 0 || (next_code == 15 && next_is_boundary);
let more_syllables = {
let mut has_more = false;
for jj in i..n {
let (c2, b2) = flat[jj];
if b2 { break; } if c2 == 0 || c2 == 15 { break; }
if c2 >= 1 && c2 <= 7 { continue; }
if let Some(ph) = phdata.get(c2) {
if ph.typ == 2 { has_more = true; break; }
}
}
has_more
};
let prepause_samples = compute_prepause(
ph_type, next_type, next2_type, ph_lm, code,
&mut result,
);
let stress_level = setlengths::stress_code_to_level(pending_stress);
pending_stress = 0;
if prepause_samples > 0 {
result.push(AnnCode::PrepauseSamples(prepause_samples));
}
result.push(AnnCode::Phoneme(PhonemeInfo {
code,
ph_type,
stress_level,
lengthen: pending_lengthen,
next_lm,
next2_lm,
more_syllables,
end_of_clause,
std_length,
}));
pending_lengthen = false;
let _ = (next_type, next2_type); }
}
}
result
}
fn compute_prepause(
ph_type: u8,
_next_type: u8,
_next2_type: u8,
_ph_lm: u8,
_code: u8,
_result: &mut Vec<AnnCode>,
) -> usize {
let prepause_ms: f64 = match ph_type {
4 => 48.0,
_ => 0.0,
};
if prepause_ms > 0.0 {
(prepause_ms / 1000.0 * 22050.0) as usize
} else {
0
}
}
fn synthesize_phoneme_info(
info: &PhonemeInfo,
phdata: &crate::phoneme::PhonemeData,
voice: &VoiceParams,
speed_factor: f64,
wavephase: &mut i32,
) -> Vec<i16> {
use setlengths::{calc_vowel_length_mod, length_mod_to_samples};
const SAMPLERATE: u32 = 22050;
let ph_tab = match phdata.get(info.code) {
Some(p) => p,
None => return Vec::new(),
};
let mut extract = bytecode::scan_phoneme(ph_tab.program, &phdata.phonindex);
if extract.fmt_addr.is_none() && extract.wav_addr.is_none() {
if let Some(target_code) = extract.change_phoneme_code {
if let Some(target_ph) = phdata.get(target_code) {
if target_ph.program > 0 {
let sub = bytecode::scan_phoneme(target_ph.program, &phdata.phonindex);
if extract.fmt_addr.is_none() { extract.fmt_addr = sub.fmt_addr; extract.fmt_param = sub.fmt_param; }
if extract.wav_addr.is_none() { extract.wav_addr = sub.wav_addr; extract.wav_param = sub.wav_param; }
}
}
}
}
if info.ph_type == 4 || info.ph_type == 6 {
if let Some(wav_addr) = extract.wav_addr {
if let Some(pcm) = sample::parse_wav_sample(
wav_addr, &phdata.phondata, speed_factor, 0,
) {
return pcm;
}
}
let n = (50.0 / 1000.0 * SAMPLERATE as f64 * speed_factor) as usize;
return vec![0i16; n];
}
let fmt_addr = match extract.fmt_addr {
Some(a) => a as usize,
None => return Vec::new(),
};
let mut seq = match phondata::SpectSeq::parse(&phdata.phondata, fmt_addr) {
Some(s) => s,
None => return Vec::new(),
};
if seq.frames.is_empty() {
return Vec::new();
}
if info.ph_type == 2 {
let length_mod = calc_vowel_length_mod(
info.stress_level,
info.next_lm,
info.next2_lm,
info.more_syllables,
info.end_of_clause,
info.std_length,
);
let length_mod = if info.lengthen { length_mod * 4 / 3 } else { length_mod };
let target_samples = length_mod_to_samples(length_mod, SAMPLERATE, speed_factor);
if target_samples > 0 {
let n = seq.frames.len();
if n > 1 {
let raw_sum: usize = seq.frames[..n-1].iter()
.map(|f| f.length as usize).sum::<usize>().max(1);
let scaled_sum = (raw_sum as f64 * 64.0 * speed_factor) as usize;
if scaled_sum > 0 {
let scale256 = target_samples * 256 / scaled_sum.max(1);
for fr in &mut seq.frames[..n-1] {
let new_len = ((fr.length as usize * scale256 / 256).max(1) as u8).min(255);
fr.length = new_len;
}
}
}
}
} else {
if (speed_factor - 1.0).abs() > 0.01 {
for fr in &mut seq.frames {
let new_len = ((fr.length as f64 * speed_factor).round() as usize).max(1);
fr.length = new_len.min(255) as u8;
}
}
}
if info.lengthen && seq.frames.len() > 1 {
let mid = seq.frames.len() / 2;
let extra = seq.frames[mid].clone();
seq.frames.insert(mid, extra);
}
let stress_amp = setlengths::STRESS_AMPS_EN
.get(info.stress_level as usize)
.copied()
.unwrap_or(20) as f64;
let general_amp = 55.0f64; let wdata_amplitude = stress_amp * general_amp / 16.0;
let amp_primary = 22.0 * 55.0 / 16.0;
let amp_factor = wdata_amplitude / amp_primary;
let raw = wavegen::synthesize_frames(&seq, voice, amp_factor, wavephase);
agc_clip(&raw)
}
fn agc_clip(samples: &[i32]) -> Vec<i16> {
if samples.is_empty() {
return Vec::new();
}
let mut agc: i64 = 256;
let mut out = Vec::with_capacity(samples.len());
for &z1 in samples {
let z = (z1 as i64 * agc) >> 8;
if z >= 32768 {
let ov = if z1 != 0 { 8_388_608i64 / (z1 as i64).abs() - 1 } else { 0 };
if ov < agc { agc = ov.max(1); }
let z2 = (z1 as i64 * agc) >> 8;
out.push(z2.clamp(-32767, 32767) as i16);
} else if z <= -32768 {
let ov = if z1 != 0 { 8_388_608i64 / (z1 as i64).abs() - 1 } else { 0 };
if ov < agc { agc = ov.max(1); }
let z2 = (z1 as i64 * agc) >> 8;
out.push(z2.clamp(-32767, 32767) as i16);
} else {
out.push(z.clamp(-32767, 32767) as i16);
}
if agc < 256 { agc += 1; }
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resonator_tick_accumulates() {
let mut r = Resonator { a: 1.0, b: 0.0, c: 0.0, x1: 0.0, x2: 0.0 };
assert!((r.tick(1.0) - 1.0).abs() < 1e-12);
assert!((r.tick(2.0) - 2.0).abs() < 1e-12);
}
#[test]
fn resonator_tick_with_feedback() {
let mut r = Resonator { a: 0.0, b: 0.5, c: 0.0, x1: 1.0, x2: 0.0 };
let y0 = r.tick(0.0);
assert!((y0 - 0.5).abs() < 1e-12);
let y1 = r.tick(0.0);
assert!((y1 - 0.25).abs() < 1e-12);
}
#[test]
fn resonator_reset_clears_state() {
let mut r = Resonator { a: 1.0, b: 0.5, c: 0.0, x1: 99.0, x2: 99.0 };
r.reset();
assert_eq!(r.x1, 0.0);
assert_eq!(r.x2, 0.0);
}
#[test]
fn frame_c_size() {
assert_eq!(Frame::C_SIZE, 64,
"Frame::C_SIZE must match the C struct frame_t");
}
#[test]
fn voice_params_default_sample_rate() {
let v = VoiceParams::default();
assert_eq!(v.sample_rate, 22050);
}
#[test]
fn synthesize_empty_string_returns_empty() {
let s = Synthesizer::new(VoiceParams::default());
let pcm = s.synthesize("").unwrap();
assert!(pcm.is_empty());
}
#[test]
fn synthesize_ipa_the() {
let s = Synthesizer::new(VoiceParams::default());
let pcm = s.synthesize("ðə").expect("should synthesise 'the'");
assert!(!pcm.is_empty());
assert!(pcm.iter().all(|&x| x >= i16::MIN + 1));
}
#[test]
fn synthesize_hello() {
let s = Synthesizer::new(VoiceParams::default());
let pcm = s.synthesize("hɛloʊ").expect("should synthesise 'hello'");
assert!(!pcm.is_empty());
assert!(pcm.len() > 5_000, "too short: {} samples", pcm.len());
}
#[test]
fn synthesize_produces_nonzero_audio() {
let s = Synthesizer::new(VoiceParams::default());
let pcm = s.synthesize("iː").unwrap();
let peak = pcm.iter().map(|&x| x.unsigned_abs()).max().unwrap_or(0);
assert!(peak > 1000, "expected non-trivial audio, got peak = {peak}");
}
#[test]
fn synthesize_stress_words() {
let s = Synthesizer::new(VoiceParams::default());
let pcm = s.synthesize("ˈhɛloʊ ˌwɜːld").unwrap();
assert!(!pcm.is_empty());
}
#[test]
fn synthesize_unknown_phonemes_error() {
let s = Synthesizer::new(VoiceParams::default());
let result = s.synthesize("☺☻♥");
assert!(result.is_err(), "expected error for all-unrecognised input");
}
#[test]
fn sample_rate_is_22050() {
let s = Synthesizer::new(VoiceParams::default());
assert_eq!(s.sample_rate(), 22050);
}
#[test]
fn synthesize_speed_affects_duration() {
let mut fast_voice = VoiceParams::default();
fast_voice.speed_percent = 200;
let s_normal = Synthesizer::new(VoiceParams::default());
let s_fast = Synthesizer::new(fast_voice);
let pcm_normal = s_normal.synthesize("hɛloʊ").unwrap();
let pcm_fast = s_fast.synthesize("hɛloʊ").unwrap();
assert!(pcm_fast.len() < pcm_normal.len(),
"fast speech must be shorter: fast={}, normal={}",
pcm_fast.len(), pcm_normal.len());
}
}