use alloc::{format, string::ToString, vec, vec::Vec};
use serde::{Deserialize, Serialize};
use tracing::trace;
use crate::error::{Result, SvaraError};
use crate::formant::{Formant, FormantFilter, Vowel, VowelTarget};
use crate::tract::{NasalPlace, VocalTract};
use crate::voice::VoiceProfile;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum Phoneme {
VowelA,
VowelE,
VowelI,
VowelO,
VowelU,
VowelSchwa,
VowelOpenO,
VowelAsh,
VowelNearI,
VowelNearU,
VowelOpenA,
VowelOpenE,
VowelCupV,
VowelBird,
VowelLongI,
DiphthongAI,
DiphthongAU,
DiphthongOI,
DiphthongEI,
DiphthongOU,
PlosiveP,
PlosiveB,
PlosiveT,
PlosiveD,
PlosiveK,
PlosiveG,
FricativeF,
FricativeV,
FricativeS,
FricativeZ,
FricativeSh,
FricativeZh,
FricativeTh,
FricativeDh,
FricativeH,
NasalM,
NasalN,
NasalNg,
AffricateCh,
AffricateJ,
GlottalStop,
TapFlap,
LateralL,
ApproximantR,
ApproximantW,
ApproximantJ,
VowelY,
VowelFrontRoundO,
VowelOpenFrontRoundO,
VowelCloseBackUnrounded,
VowelMidBackUnrounded,
VowelCloseCentral,
VowelCloseCentralRounded,
PlosiveUvularQ,
PlosiveUvularG,
PlosiveRetroT,
PlosiveRetroD,
FricativeBilabialPh,
FricativeBilabialB,
FricativePalatalC,
FricativePalatalJ,
FricativeUvularCh,
FricativeUvularR,
FricativePharyngealH,
FricativePharyngealA,
FricativeRetroS,
FricativeRetroZ,
FricativeLateralS,
FricativeLateralZ,
FricativeGlottalH,
NasalRetro,
NasalPalatal,
NasalUvular,
TrillBilabial,
TrillAlveolar,
TrillUvular,
ApproximantRetro,
LateralPalatal,
LateralVelar,
FlapRetro,
FlapLateral,
AffricateTs,
AffricateDz,
AffricateRetro,
AffricateRetroVoiced,
AffricatePf,
AffricateLateral,
ClickBilabial,
ClickDental,
ClickAlveolar,
ClickPalatal,
ClickLateral,
EjectiveP,
EjectiveT,
EjectiveK,
EjectiveS,
EjectiveCh,
ImplosiveB,
ImplosiveD,
ImplosiveG,
Silence,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum PhonemeClass {
Plosive,
Fricative,
Nasal,
Approximant,
Affricate,
Lateral,
Vowel,
Diphthong,
Trill,
Click,
Ejective,
Implosive,
Silence,
}
impl Phoneme {
#[must_use]
pub fn class(&self) -> PhonemeClass {
match self {
Self::VowelA
| Self::VowelE
| Self::VowelI
| Self::VowelO
| Self::VowelU
| Self::VowelSchwa
| Self::VowelOpenO
| Self::VowelAsh
| Self::VowelNearI
| Self::VowelNearU
| Self::VowelOpenA
| Self::VowelOpenE
| Self::VowelCupV
| Self::VowelBird
| Self::VowelLongI
| Self::VowelY
| Self::VowelFrontRoundO
| Self::VowelOpenFrontRoundO
| Self::VowelCloseBackUnrounded
| Self::VowelMidBackUnrounded
| Self::VowelCloseCentral
| Self::VowelCloseCentralRounded => PhonemeClass::Vowel,
Self::DiphthongAI
| Self::DiphthongAU
| Self::DiphthongOI
| Self::DiphthongEI
| Self::DiphthongOU => PhonemeClass::Diphthong,
Self::PlosiveP
| Self::PlosiveB
| Self::PlosiveT
| Self::PlosiveD
| Self::PlosiveK
| Self::PlosiveG
| Self::PlosiveUvularQ
| Self::PlosiveUvularG
| Self::PlosiveRetroT
| Self::PlosiveRetroD => PhonemeClass::Plosive,
Self::FricativeF
| Self::FricativeV
| Self::FricativeS
| Self::FricativeZ
| Self::FricativeSh
| Self::FricativeZh
| Self::FricativeTh
| Self::FricativeDh
| Self::FricativeH
| Self::FricativeBilabialPh
| Self::FricativeBilabialB
| Self::FricativePalatalC
| Self::FricativePalatalJ
| Self::FricativeUvularCh
| Self::FricativeUvularR
| Self::FricativePharyngealH
| Self::FricativePharyngealA
| Self::FricativeRetroS
| Self::FricativeRetroZ
| Self::FricativeLateralS
| Self::FricativeLateralZ
| Self::FricativeGlottalH => PhonemeClass::Fricative,
Self::NasalM
| Self::NasalN
| Self::NasalNg
| Self::NasalRetro
| Self::NasalPalatal
| Self::NasalUvular => PhonemeClass::Nasal,
Self::AffricateCh
| Self::AffricateJ
| Self::AffricateTs
| Self::AffricateDz
| Self::AffricateRetro
| Self::AffricateRetroVoiced
| Self::AffricatePf
| Self::AffricateLateral => PhonemeClass::Affricate,
Self::GlottalStop => PhonemeClass::Plosive,
Self::TapFlap => PhonemeClass::Plosive,
Self::LateralL | Self::LateralPalatal | Self::LateralVelar => PhonemeClass::Lateral,
Self::ApproximantR
| Self::ApproximantW
| Self::ApproximantJ
| Self::ApproximantRetro => PhonemeClass::Approximant,
Self::TrillBilabial | Self::TrillAlveolar | Self::TrillUvular => PhonemeClass::Trill,
Self::FlapRetro | Self::FlapLateral => PhonemeClass::Plosive,
Self::ClickBilabial
| Self::ClickDental
| Self::ClickAlveolar
| Self::ClickPalatal
| Self::ClickLateral => PhonemeClass::Click,
Self::EjectiveP
| Self::EjectiveT
| Self::EjectiveK
| Self::EjectiveS
| Self::EjectiveCh => PhonemeClass::Ejective,
Self::ImplosiveB | Self::ImplosiveD | Self::ImplosiveG => PhonemeClass::Implosive,
Self::Silence => PhonemeClass::Silence,
}
}
#[must_use]
pub fn is_voiced(&self) -> bool {
match self {
Self::PlosiveP
| Self::PlosiveT
| Self::PlosiveK
| Self::FricativeF
| Self::FricativeS
| Self::FricativeSh
| Self::FricativeTh
| Self::FricativeH
| Self::AffricateCh
| Self::GlottalStop
| Self::PlosiveUvularQ
| Self::PlosiveRetroT
| Self::FricativeBilabialPh
| Self::FricativePalatalC
| Self::FricativeUvularCh
| Self::FricativePharyngealH
| Self::FricativeRetroS
| Self::FricativeLateralS
| Self::AffricateTs
| Self::AffricateRetro
| Self::AffricatePf
| Self::AffricateLateral
| Self::ClickBilabial
| Self::ClickDental
| Self::ClickAlveolar
| Self::ClickPalatal
| Self::ClickLateral
| Self::EjectiveP
| Self::EjectiveT
| Self::EjectiveK
| Self::EjectiveS
| Self::EjectiveCh
| Self::Silence => false,
_ => true,
}
}
#[must_use]
pub fn coarticulation_resistance(&self) -> f32 {
match self {
Self::VowelI | Self::VowelLongI | Self::VowelNearI => 0.9,
Self::VowelU | Self::VowelNearU => 0.85,
Self::FricativeS | Self::FricativeZ | Self::FricativeSh | Self::FricativeZh => 0.85,
Self::AffricateCh | Self::AffricateJ => 0.85,
Self::VowelA | Self::VowelOpenA | Self::VowelAsh => 0.7,
Self::VowelE | Self::VowelO | Self::VowelOpenO | Self::VowelOpenE => 0.7,
Self::PlosiveT | Self::PlosiveD | Self::NasalN | Self::LateralL => 0.75,
Self::PlosiveK | Self::PlosiveG | Self::NasalNg => 0.7,
Self::PlosiveP | Self::PlosiveB | Self::NasalM => 0.65,
Self::FricativeF | Self::FricativeV => 0.6,
Self::FricativeTh | Self::FricativeDh => 0.6,
Self::VowelCupV | Self::VowelBird => 0.5,
Self::ApproximantR | Self::ApproximantJ => 0.55,
Self::ApproximantW => 0.5,
Self::TapFlap => 0.4,
Self::VowelSchwa => 0.2,
Self::FricativeH => 0.15, Self::GlottalStop => 0.1,
Self::Silence => 0.0,
Self::DiphthongAI
| Self::DiphthongAU
| Self::DiphthongOI
| Self::DiphthongEI
| Self::DiphthongOU => 0.6,
Self::ClickBilabial
| Self::ClickDental
| Self::ClickAlveolar
| Self::ClickPalatal
| Self::ClickLateral => 0.9,
Self::EjectiveP | Self::EjectiveT | Self::EjectiveK => 0.85,
Self::EjectiveS | Self::EjectiveCh => 0.85,
Self::ImplosiveB | Self::ImplosiveD | Self::ImplosiveG => 0.7,
_ => match self.class() {
PhonemeClass::Vowel => 0.7,
PhonemeClass::Plosive => 0.7,
PhonemeClass::Fricative => 0.7,
PhonemeClass::Nasal => 0.7,
PhonemeClass::Trill => 0.6,
PhonemeClass::Approximant | PhonemeClass::Lateral => 0.55,
PhonemeClass::Affricate => 0.85,
PhonemeClass::Click => 0.9,
PhonemeClass::Ejective => 0.85,
PhonemeClass::Implosive => 0.7,
PhonemeClass::Diphthong => 0.6,
PhonemeClass::Silence => 0.0,
},
}
}
}
#[must_use]
pub fn f2_locus_equation(phoneme: &Phoneme) -> Option<(f32, f32)> {
match phoneme {
Phoneme::PlosiveP | Phoneme::PlosiveB | Phoneme::NasalM => Some((900.0, 0.85)),
Phoneme::PlosiveT | Phoneme::PlosiveD | Phoneme::NasalN | Phoneme::TapFlap => {
Some((1750.0, 0.55))
}
Phoneme::PlosiveK | Phoneme::PlosiveG | Phoneme::NasalNg => Some((2000.0, 0.70)),
_ => None,
}
}
#[must_use]
pub fn phoneme_formants(phoneme: &Phoneme) -> VowelTarget {
match phoneme {
Phoneme::VowelA => VowelTarget::from_vowel(Vowel::A),
Phoneme::VowelOpenA => VowelTarget::with_bandwidths(
[745.0, 1100.0, 2440.0, 3300.0, 3750.0],
[85.0, 90.0, 110.0, 130.0, 150.0],
),
Phoneme::VowelE => VowelTarget::from_vowel(Vowel::E),
Phoneme::VowelI | Phoneme::VowelLongI => VowelTarget::from_vowel(Vowel::I),
Phoneme::VowelO => VowelTarget::from_vowel(Vowel::O),
Phoneme::VowelU => VowelTarget::from_vowel(Vowel::U),
Phoneme::VowelSchwa => VowelTarget::from_vowel(Vowel::Schwa),
Phoneme::VowelBird => VowelTarget::with_bandwidths(
[580.0, 1400.0, 2500.0, 3300.0, 3750.0],
[70.0, 80.0, 100.0, 120.0, 140.0],
),
Phoneme::VowelOpenO => VowelTarget::from_vowel(Vowel::OpenO),
Phoneme::VowelAsh => VowelTarget::from_vowel(Vowel::Ash),
Phoneme::VowelNearI => VowelTarget::from_vowel(Vowel::NearI),
Phoneme::VowelNearU => VowelTarget::from_vowel(Vowel::NearU),
Phoneme::VowelOpenE | Phoneme::VowelCupV => {
VowelTarget::new(600.0, 1770.0, 2500.0, 3300.0, 3750.0)
}
Phoneme::DiphthongAI | Phoneme::DiphthongAU => VowelTarget::from_vowel(Vowel::A),
Phoneme::DiphthongOI => VowelTarget::from_vowel(Vowel::OpenO),
Phoneme::DiphthongEI => VowelTarget::from_vowel(Vowel::E),
Phoneme::DiphthongOU => VowelTarget::from_vowel(Vowel::O),
Phoneme::PlosiveP | Phoneme::PlosiveB | Phoneme::NasalM => {
VowelTarget::new(350.0, 900.0, 2400.0, 3300.0, 3750.0)
}
Phoneme::PlosiveT | Phoneme::PlosiveD | Phoneme::NasalN | Phoneme::LateralL => {
VowelTarget::new(400.0, 1750.0, 2600.0, 3300.0, 3750.0)
}
Phoneme::PlosiveK | Phoneme::PlosiveG | Phoneme::NasalNg => {
VowelTarget::new(350.0, 1800.0, 2500.0, 3300.0, 3750.0)
}
Phoneme::FricativeF | Phoneme::FricativeV => {
VowelTarget::new(350.0, 1050.0, 2400.0, 3300.0, 3750.0)
}
Phoneme::FricativeS | Phoneme::FricativeZ => {
VowelTarget::new(400.0, 1750.0, 2600.0, 3300.0, 3750.0)
}
Phoneme::FricativeSh | Phoneme::FricativeZh => {
VowelTarget::new(350.0, 1600.0, 2500.0, 3300.0, 3750.0)
}
Phoneme::FricativeTh | Phoneme::FricativeDh => {
VowelTarget::new(350.0, 1400.0, 2500.0, 3300.0, 3750.0)
}
Phoneme::FricativeH => VowelTarget::from_vowel(Vowel::Schwa),
Phoneme::AffricateCh | Phoneme::AffricateJ => {
VowelTarget::new(350.0, 1600.0, 2500.0, 3300.0, 3750.0)
}
Phoneme::GlottalStop => VowelTarget::from_vowel(Vowel::Schwa),
Phoneme::TapFlap => VowelTarget::new(400.0, 1750.0, 2600.0, 3300.0, 3750.0),
Phoneme::ApproximantR => VowelTarget::new(350.0, 1300.0, 1600.0, 3300.0, 3750.0),
Phoneme::ApproximantW => VowelTarget::new(300.0, 700.0, 2200.0, 3300.0, 3750.0),
Phoneme::ApproximantJ => VowelTarget::new(280.0, 2200.0, 2900.0, 3300.0, 3750.0),
Phoneme::VowelY => VowelTarget::with_bandwidths(
[320.0, 1800.0, 2400.0, 3300.0, 3750.0],
[45.0, 70.0, 90.0, 120.0, 140.0],
),
Phoneme::VowelFrontRoundO => VowelTarget::with_bandwidths(
[400.0, 1600.0, 2400.0, 3300.0, 3750.0],
[55.0, 75.0, 95.0, 120.0, 140.0],
),
Phoneme::VowelOpenFrontRoundO => VowelTarget::with_bandwidths(
[550.0, 1500.0, 2400.0, 3300.0, 3750.0],
[70.0, 80.0, 100.0, 120.0, 140.0],
),
Phoneme::VowelCloseBackUnrounded => VowelTarget::with_bandwidths(
[350.0, 1300.0, 2400.0, 3300.0, 3750.0],
[45.0, 65.0, 90.0, 120.0, 140.0],
),
Phoneme::VowelMidBackUnrounded => VowelTarget::with_bandwidths(
[460.0, 1250.0, 2450.0, 3300.0, 3750.0],
[55.0, 70.0, 95.0, 120.0, 140.0],
),
Phoneme::VowelCloseCentral => VowelTarget::with_bandwidths(
[330.0, 1650.0, 2450.0, 3300.0, 3750.0],
[45.0, 70.0, 90.0, 120.0, 140.0],
),
Phoneme::VowelCloseCentralRounded => VowelTarget::with_bandwidths(
[310.0, 1500.0, 2300.0, 3300.0, 3750.0],
[45.0, 70.0, 90.0, 120.0, 140.0],
),
Phoneme::PlosiveUvularQ | Phoneme::PlosiveUvularG => {
VowelTarget::new(350.0, 1100.0, 2300.0, 3300.0, 3750.0)
}
Phoneme::PlosiveRetroT | Phoneme::PlosiveRetroD => {
VowelTarget::new(400.0, 1550.0, 2000.0, 3300.0, 3750.0)
}
Phoneme::FricativeBilabialPh | Phoneme::FricativeBilabialB => {
VowelTarget::new(350.0, 900.0, 2400.0, 3300.0, 3750.0)
}
Phoneme::FricativePalatalC | Phoneme::FricativePalatalJ => {
VowelTarget::new(300.0, 2200.0, 2900.0, 3300.0, 3750.0)
}
Phoneme::FricativeUvularCh | Phoneme::FricativeUvularR => {
VowelTarget::new(350.0, 1100.0, 2300.0, 3300.0, 3750.0)
}
Phoneme::FricativePharyngealH | Phoneme::FricativePharyngealA => {
VowelTarget::new(700.0, 1000.0, 2400.0, 3300.0, 3750.0)
}
Phoneme::FricativeRetroS | Phoneme::FricativeRetroZ => {
VowelTarget::new(400.0, 1550.0, 2000.0, 3300.0, 3750.0)
}
Phoneme::FricativeLateralS | Phoneme::FricativeLateralZ => {
VowelTarget::new(400.0, 1600.0, 2500.0, 3300.0, 3750.0)
}
Phoneme::FricativeGlottalH => VowelTarget::from_vowel(Vowel::Schwa),
Phoneme::NasalRetro => VowelTarget::new(400.0, 1550.0, 2000.0, 3300.0, 3750.0),
Phoneme::NasalPalatal => VowelTarget::new(300.0, 2200.0, 2900.0, 3300.0, 3750.0),
Phoneme::NasalUvular => VowelTarget::new(350.0, 1100.0, 2300.0, 3300.0, 3750.0),
Phoneme::TrillBilabial => VowelTarget::new(350.0, 900.0, 2400.0, 3300.0, 3750.0),
Phoneme::TrillAlveolar => VowelTarget::new(400.0, 1500.0, 2500.0, 3300.0, 3750.0),
Phoneme::TrillUvular => VowelTarget::new(350.0, 1100.0, 2300.0, 3300.0, 3750.0),
Phoneme::ApproximantRetro => VowelTarget::new(350.0, 1300.0, 1600.0, 3300.0, 3750.0),
Phoneme::LateralPalatal => VowelTarget::new(300.0, 2000.0, 2800.0, 3300.0, 3750.0),
Phoneme::LateralVelar => VowelTarget::new(350.0, 1100.0, 2300.0, 3300.0, 3750.0),
Phoneme::FlapRetro => VowelTarget::new(400.0, 1550.0, 2000.0, 3300.0, 3750.0),
Phoneme::FlapLateral => VowelTarget::new(400.0, 1600.0, 2500.0, 3300.0, 3750.0),
Phoneme::AffricateTs | Phoneme::AffricateDz => {
VowelTarget::new(400.0, 1750.0, 2600.0, 3300.0, 3750.0)
}
Phoneme::AffricateRetro | Phoneme::AffricateRetroVoiced => {
VowelTarget::new(400.0, 1550.0, 2000.0, 3300.0, 3750.0)
}
Phoneme::AffricatePf => VowelTarget::new(350.0, 1050.0, 2400.0, 3300.0, 3750.0),
Phoneme::AffricateLateral => VowelTarget::new(400.0, 1600.0, 2500.0, 3300.0, 3750.0),
Phoneme::ClickBilabial => VowelTarget::new(350.0, 900.0, 2400.0, 3300.0, 3750.0),
Phoneme::ClickDental => VowelTarget::new(400.0, 1500.0, 2600.0, 3300.0, 3750.0),
Phoneme::ClickAlveolar => VowelTarget::new(400.0, 1750.0, 2600.0, 3300.0, 3750.0),
Phoneme::ClickPalatal => VowelTarget::new(350.0, 2000.0, 2800.0, 3300.0, 3750.0),
Phoneme::ClickLateral => VowelTarget::new(400.0, 1500.0, 2500.0, 3300.0, 3750.0),
Phoneme::EjectiveP => VowelTarget::new(350.0, 900.0, 2400.0, 3300.0, 3750.0),
Phoneme::EjectiveT => VowelTarget::new(400.0, 1750.0, 2600.0, 3300.0, 3750.0),
Phoneme::EjectiveK => VowelTarget::new(350.0, 1800.0, 2500.0, 3300.0, 3750.0),
Phoneme::EjectiveS => VowelTarget::new(400.0, 1750.0, 2600.0, 3300.0, 3750.0),
Phoneme::EjectiveCh => VowelTarget::new(350.0, 1600.0, 2500.0, 3300.0, 3750.0),
Phoneme::ImplosiveB => VowelTarget::new(350.0, 900.0, 2400.0, 3300.0, 3750.0),
Phoneme::ImplosiveD => VowelTarget::new(400.0, 1750.0, 2600.0, 3300.0, 3750.0),
Phoneme::ImplosiveG => VowelTarget::new(350.0, 1800.0, 2500.0, 3300.0, 3750.0),
Phoneme::Silence => VowelTarget::new(500.0, 1500.0, 2500.0, 3300.0, 3750.0),
}
}
#[must_use]
pub fn phoneme_duration(phoneme: &Phoneme) -> f32 {
match phoneme.class() {
PhonemeClass::Vowel => 0.12,
PhonemeClass::Diphthong => 0.18,
PhonemeClass::Plosive => 0.08,
PhonemeClass::Fricative => 0.10,
PhonemeClass::Nasal => 0.08,
PhonemeClass::Affricate => 0.12,
PhonemeClass::Approximant | PhonemeClass::Lateral => 0.07,
PhonemeClass::Trill => 0.10, PhonemeClass::Click => 0.04, PhonemeClass::Ejective => 0.10, PhonemeClass::Implosive => 0.08, PhonemeClass::Silence => 0.05,
}
}
#[must_use]
pub fn phoneme_spectral_tilt(phoneme: &Phoneme) -> f32 {
let target = phoneme_formants(phoneme);
match phoneme.class() {
PhonemeClass::Vowel | PhonemeClass::Diphthong => {
((target.f1 - 300.0) / 250.0).clamp(0.0, 2.0)
}
_ => 0.0,
}
}
#[must_use]
pub fn height_adjusted_amplitudes(phoneme: &Phoneme) -> [f32; 5] {
let target = phoneme_formants(phoneme);
let base = crate::formant::DEFAULT_AMPLITUDES;
match phoneme.class() {
PhonemeClass::Vowel | PhonemeClass::Diphthong => {
let openness = ((target.f1 - 300.0) / 500.0).clamp(0.0, 1.0);
[
base[0], base[1] * (1.0 - openness * 0.05), base[2] * (1.0 - openness * 0.10), base[3] * (1.0 - openness * 0.15), base[4] * (1.0 - openness * 0.15), ]
}
_ => base,
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[non_exhaustive]
pub struct VoiceOnsetTime {
pub closure_fraction: f32,
pub burst_fraction: f32,
pub aspiration_fraction: f32,
}
impl VoiceOnsetTime {
#[must_use]
pub fn for_plosive(phoneme: &Phoneme) -> Self {
match phoneme {
Phoneme::PlosiveP | Phoneme::PlosiveUvularQ => Self {
closure_fraction: 0.35,
burst_fraction: 0.10,
aspiration_fraction: 0.25, },
Phoneme::PlosiveT | Phoneme::PlosiveRetroT => Self {
closure_fraction: 0.30,
burst_fraction: 0.10,
aspiration_fraction: 0.30, },
Phoneme::PlosiveK => Self {
closure_fraction: 0.30,
burst_fraction: 0.12,
aspiration_fraction: 0.28, },
Phoneme::PlosiveB | Phoneme::PlosiveUvularG => Self {
closure_fraction: 0.30,
burst_fraction: 0.08,
aspiration_fraction: 0.0, },
Phoneme::PlosiveD | Phoneme::PlosiveRetroD => Self {
closure_fraction: 0.25,
burst_fraction: 0.08,
aspiration_fraction: 0.0,
},
Phoneme::PlosiveG => Self {
closure_fraction: 0.25,
burst_fraction: 0.10,
aspiration_fraction: 0.0,
},
_ => Self {
closure_fraction: 0.33,
burst_fraction: 0.10,
aspiration_fraction: 0.10,
},
}
}
}
#[must_use]
pub fn detect_nasalization(phonemes: &[Phoneme]) -> Vec<Option<Nasalization>> {
phonemes
.iter()
.enumerate()
.map(|(i, phoneme)| {
let is_vowel_like = matches!(
phoneme.class(),
PhonemeClass::Vowel | PhonemeClass::Diphthong
);
if is_vowel_like {
phonemes.get(i + 1).and_then(Nasalization::for_nasal)
} else {
None
}
})
.collect()
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SynthesisContext {
tract: VocalTract,
glottal: crate::glottal::GlottalSource,
noise: crate::rng::Rng,
fric_filter: Option<FormantFilter>,
buffer: Vec<f32>,
sample_rate: f32,
}
impl SynthesisContext {
pub fn new(voice: &VoiceProfile, sample_rate: f32) -> Result<Self> {
let glottal = voice
.create_glottal_source(sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
Ok(Self {
tract: VocalTract::new(sample_rate),
glottal,
noise: crate::rng::Rng::new(17),
fric_filter: None,
buffer: Vec::new(),
sample_rate,
})
}
#[must_use]
pub fn sample_rate(&self) -> f32 {
self.sample_rate
}
pub fn synthesize(
&mut self,
phoneme: &Phoneme,
voice: &VoiceProfile,
duration: f32,
nasalization: Option<&Nasalization>,
) -> Result<&[f32]> {
if duration <= 0.0 || !duration.is_finite() {
return Err(SvaraError::InvalidDuration(format!(
"duration must be positive and finite, got {duration}"
)));
}
let num_samples = (duration * self.sample_rate) as usize;
if num_samples == 0 {
self.buffer.clear();
return Ok(&self.buffer);
}
self.buffer.resize(num_samples, 0.0);
self.tract.reset();
self.glottal
.set_model(crate::glottal::GlottalModel::Rosenberg);
self.glottal.set_f0(voice.base_f0)?;
self.glottal.set_breathiness(voice.breathiness);
self.glottal.set_jitter(voice.jitter);
self.glottal.set_shimmer(voice.shimmer);
self.glottal
.set_vibrato(voice.vibrato_rate, voice.vibrato_depth);
match phoneme {
Phoneme::Silence => {
for s in self.buffer.iter_mut() {
*s = 0.0;
}
}
_ => match phoneme.class() {
PhonemeClass::Vowel | PhonemeClass::Diphthong => {
self.synthesize_voiced(phoneme, voice, num_samples, nasalization)?;
}
PhonemeClass::Nasal => {
self.synthesize_nasal_ctx(phoneme, voice, num_samples)?;
}
PhonemeClass::Fricative => {
self.synthesize_fricative_ctx(phoneme, voice, num_samples)?;
}
PhonemeClass::Plosive => {
self.synthesize_plosive_ctx(phoneme, voice, num_samples)?;
}
PhonemeClass::Affricate => {
self.synthesize_affricate_ctx(phoneme, voice, num_samples)?;
}
PhonemeClass::Approximant | PhonemeClass::Lateral | PhonemeClass::Trill => {
self.synthesize_approx_ctx(phoneme, voice, num_samples)?;
}
PhonemeClass::Click => {
self.synthesize_click_ctx(phoneme, voice, num_samples)?;
}
PhonemeClass::Ejective => {
self.synthesize_ejective_ctx(phoneme, voice, num_samples)?;
}
PhonemeClass::Implosive => {
self.synthesize_implosive_ctx(phoneme, voice, num_samples)?;
}
PhonemeClass::Silence => {
for s in self.buffer.iter_mut() {
*s = 0.0;
}
}
},
}
apply_amplitude_envelope(&mut self.buffer, num_samples);
Ok(&self.buffer[..num_samples])
}
fn synthesize_voiced(
&mut self,
phoneme: &Phoneme,
voice: &VoiceProfile,
num_samples: usize,
nasalization: Option<&Nasalization>,
) -> Result<()> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
self.tract.set_formants_from_target(&target)?;
self.tract.set_nasal_coupling(0.0);
if let Some(nasal) = nasalization {
self.tract.set_nasal_place(nasal.place);
let onset_sample = (nasal.onset * num_samples as f32) as usize;
let ramp_len = num_samples.saturating_sub(onset_sample).max(1);
if phoneme.class() == PhonemeClass::Diphthong {
let start = voice.apply_formant_scale(&phoneme_formants(phoneme));
let end = voice.apply_formant_scale(&diphthong_end_target(phoneme));
for i in 0..num_samples {
let t = i as f32 / num_samples as f32;
let current = VowelTarget::interpolate(&start, &end, t);
self.tract.set_formants_from_target(¤t)?;
if i >= onset_sample {
let nt = (i - onset_sample) as f32 / ramp_len as f32;
self.tract.set_nasal_coupling(
nasal.peak_coupling * hisab::calc::ease_in_out_smooth(nt),
);
}
self.buffer[i] = self.tract.process_sample(self.glottal.next_sample());
}
} else {
for i in 0..num_samples {
if i >= onset_sample {
let nt = (i - onset_sample) as f32 / ramp_len as f32;
self.tract.set_nasal_coupling(
nasal.peak_coupling * hisab::calc::ease_in_out_smooth(nt),
);
}
self.buffer[i] = self.tract.process_sample(self.glottal.next_sample());
}
}
} else if phoneme.class() == PhonemeClass::Diphthong {
let start = voice.apply_formant_scale(&phoneme_formants(phoneme));
let end = voice.apply_formant_scale(&diphthong_end_target(phoneme));
for i in 0..num_samples {
let t = i as f32 / num_samples as f32;
let current = VowelTarget::interpolate(&start, &end, t);
self.tract.set_formants_from_target(¤t)?;
self.buffer[i] = self.tract.process_sample(self.glottal.next_sample());
}
} else {
self.tract
.synthesize_into(&mut self.glottal, &mut self.buffer[..num_samples]);
}
Ok(())
}
fn synthesize_nasal_ctx(
&mut self,
phoneme: &Phoneme,
voice: &VoiceProfile,
num_samples: usize,
) -> Result<()> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
self.tract.set_formants_from_target(&target)?;
self.tract.set_nasal_coupling(0.8);
let place = match phoneme {
Phoneme::NasalM => NasalPlace::Bilabial,
Phoneme::NasalN => NasalPlace::Alveolar,
Phoneme::NasalNg => NasalPlace::Velar,
_ => NasalPlace::Neutral,
};
self.tract.set_nasal_place(place);
self.tract
.synthesize_into(&mut self.glottal, &mut self.buffer[..num_samples]);
Ok(())
}
fn synthesize_fricative_ctx(
&mut self,
phoneme: &Phoneme,
voice: &VoiceProfile,
num_samples: usize,
) -> Result<()> {
let fric_f = fricative_formants(phoneme, self.sample_rate);
let mut filter = FormantFilter::new(&fric_f, self.sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
if phoneme.is_voiced() {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
self.tract.set_formants_from_target(&target)?;
self.tract.set_nasal_coupling(0.0);
for i in 0..num_samples {
let n = self.noise.next_f32() * 0.5;
let friction = filter.process_sample(n);
let voicing = self.tract.process_sample(self.glottal.next_sample()) * 0.4;
self.buffer[i] = friction + voicing;
}
} else {
for i in 0..num_samples {
let n = self.noise.next_f32() * 0.6;
self.buffer[i] = filter.process_sample(n);
}
}
self.fric_filter = Some(filter);
Ok(())
}
fn synthesize_plosive_ctx(
&mut self,
phoneme: &Phoneme,
voice: &VoiceProfile,
num_samples: usize,
) -> Result<()> {
let closure_end = num_samples / 3;
let burst_end = closure_end + (num_samples / 6).max(1);
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let formants = target.to_formants();
let mut filter = FormantFilter::new(&formants, self.sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
for s in self.buffer[..closure_end].iter_mut() {
*s = 0.0;
}
for i in closure_end..burst_end.min(num_samples) {
self.buffer[i] = filter.process_sample(self.noise.next_f32() * 0.8);
}
if phoneme.is_voiced() {
self.tract.set_formants_from_target(&target)?;
self.tract.set_nasal_coupling(0.0);
self.glottal.set_breathiness(0.4);
for i in burst_end..num_samples {
self.buffer[i] = self.tract.process_sample(self.glottal.next_sample()) * 0.5;
}
self.glottal.set_breathiness(voice.breathiness);
} else {
for i in burst_end..num_samples {
self.buffer[i] = filter.process_sample(self.noise.next_f32() * 0.3);
}
}
Ok(())
}
fn synthesize_affricate_ctx(
&mut self,
phoneme: &Phoneme,
voice: &VoiceProfile,
num_samples: usize,
) -> Result<()> {
let closure_end = num_samples / 4;
let burst_end = closure_end + (num_samples / 8).max(1);
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let fric_f = fricative_formants(phoneme, self.sample_rate);
let mut filter = FormantFilter::new(&fric_f, self.sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
for s in self.buffer[..closure_end].iter_mut() {
*s = 0.0;
}
for i in closure_end..burst_end.min(num_samples) {
self.buffer[i] = filter.process_sample(self.noise.next_f32() * 0.8);
}
if phoneme.is_voiced() {
self.tract.set_formants_from_target(&target)?;
self.tract.set_nasal_coupling(0.0);
for i in burst_end..num_samples {
let voiced = self.tract.process_sample(self.glottal.next_sample());
let fric = filter.process_sample(self.noise.next_f32() * 0.5);
self.buffer[i] = voiced * 0.5 + fric * 0.5;
}
} else {
for i in burst_end..num_samples {
self.buffer[i] = filter.process_sample(self.noise.next_f32() * 0.6);
}
}
Ok(())
}
fn synthesize_approx_ctx(
&mut self,
phoneme: &Phoneme,
voice: &VoiceProfile,
num_samples: usize,
) -> Result<()> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
self.tract.set_formants_from_target(&target)?;
self.tract.set_nasal_coupling(0.0);
self.glottal.set_breathiness(voice.breathiness.max(0.1));
self.tract
.synthesize_into(&mut self.glottal, &mut self.buffer[..num_samples]);
for s in self.buffer[..num_samples].iter_mut() {
*s *= 0.7;
}
self.glottal.set_breathiness(voice.breathiness);
Ok(())
}
fn synthesize_click_ctx(
&mut self,
phoneme: &Phoneme,
voice: &VoiceProfile,
num_samples: usize,
) -> Result<()> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let formants = target.to_formants();
let mut filter = FormantFilter::new(&formants, self.sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
let burst_start = num_samples * 6 / 10;
for s in self.buffer[..burst_start].iter_mut() {
*s = 0.0;
}
for i in burst_start..num_samples {
let n = self.noise.next_f32();
let env = 1.0 - (i - burst_start) as f32 / (num_samples - burst_start) as f32;
self.buffer[i] = filter.process_sample(n * env * env);
}
Ok(())
}
fn synthesize_ejective_ctx(
&mut self,
phoneme: &Phoneme,
voice: &VoiceProfile,
num_samples: usize,
) -> Result<()> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let is_fricative_ejective = matches!(phoneme, Phoneme::EjectiveS | Phoneme::EjectiveCh);
let formants = target.to_formants();
let mut filter = FormantFilter::new(&formants, self.sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
let closure_end = num_samples * 4 / 10;
let burst_end = closure_end + num_samples * 2 / 10;
for s in self.buffer[..closure_end].iter_mut() {
*s = 0.0;
}
for i in closure_end..burst_end.min(num_samples) {
self.buffer[i] = filter.process_sample(self.noise.next_f32() * 1.0);
}
if is_fricative_ejective {
let fric_f = fricative_formants(phoneme, self.sample_rate);
let mut fric_filter = FormantFilter::new(&fric_f, self.sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
for i in burst_end..num_samples {
let t = (i - burst_end) as f32 / (num_samples - burst_end).max(1) as f32;
let decay = 1.0 - t * 0.5; self.buffer[i] = fric_filter.process_sample(self.noise.next_f32() * 0.6 * decay);
}
} else {
for i in burst_end..num_samples {
let t = (i - burst_end) as f32 / (num_samples - burst_end).max(1) as f32;
let decay = (1.0 - t).max(0.0);
self.buffer[i] = filter.process_sample(self.noise.next_f32() * 0.3 * decay);
}
}
Ok(())
}
fn synthesize_implosive_ctx(
&mut self,
phoneme: &Phoneme,
voice: &VoiceProfile,
num_samples: usize,
) -> Result<()> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
self.tract.set_formants_from_target(&target)?;
self.tract.set_nasal_coupling(0.0);
self.glottal.set_creaky(0.5);
let closure_end = num_samples / 3;
for s in self.buffer[..closure_end].iter_mut() {
*s = 0.0;
}
for i in closure_end..num_samples {
let exc = self.glottal.next_sample();
self.buffer[i] = self.tract.process_sample(exc) * 0.5;
}
self.glottal
.set_model(crate::glottal::GlottalModel::Rosenberg);
Ok(())
}
}
type NoiseGen = crate::rng::Rng;
fn diphthong_end_target(phoneme: &Phoneme) -> VowelTarget {
match phoneme {
Phoneme::DiphthongAI => VowelTarget::from_vowel(Vowel::I),
Phoneme::DiphthongAU => VowelTarget::from_vowel(Vowel::U),
Phoneme::DiphthongOI => VowelTarget::from_vowel(Vowel::I),
Phoneme::DiphthongEI => VowelTarget::from_vowel(Vowel::I),
Phoneme::DiphthongOU => VowelTarget::from_vowel(Vowel::U),
_ => phoneme_formants(phoneme),
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[non_exhaustive]
pub struct Nasalization {
pub onset: f32,
pub peak_coupling: f32,
pub place: NasalPlace,
}
impl Nasalization {
#[must_use]
pub fn for_nasal(nasal: &Phoneme) -> Option<Self> {
let place = match nasal {
Phoneme::NasalM => NasalPlace::Bilabial,
Phoneme::NasalN | Phoneme::NasalRetro => NasalPlace::Alveolar,
Phoneme::NasalNg | Phoneme::NasalUvular => NasalPlace::Velar,
Phoneme::NasalPalatal => NasalPlace::Alveolar, _ => return None,
};
Some(Self {
onset: 0.65,
peak_coupling: 0.4,
place,
})
}
}
pub fn synthesize_phoneme(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
duration: f32,
) -> Result<Vec<f32>> {
if duration <= 0.0 || !duration.is_finite() {
return Err(SvaraError::InvalidDuration(format!(
"duration must be positive and finite, got {duration}"
)));
}
if sample_rate <= 0.0 {
return Err(SvaraError::ArticulationFailed(
"sample_rate must be positive".to_string(),
));
}
let num_samples = (duration * sample_rate) as usize;
if num_samples == 0 {
return Ok(Vec::new());
}
trace!(?phoneme, duration, num_samples, "synthesizing phoneme");
match phoneme {
Phoneme::Silence => Ok(vec![0.0; num_samples]),
_ => match phoneme.class() {
PhonemeClass::Vowel => synthesize_vowel(phoneme, voice, sample_rate, num_samples),
PhonemeClass::Diphthong => {
synthesize_diphthong(phoneme, voice, sample_rate, num_samples)
}
PhonemeClass::Plosive => synthesize_plosive(phoneme, voice, sample_rate, num_samples),
PhonemeClass::Fricative => {
synthesize_fricative(phoneme, voice, sample_rate, num_samples)
}
PhonemeClass::Nasal => synthesize_nasal(phoneme, voice, sample_rate, num_samples),
PhonemeClass::Affricate => {
synthesize_affricate(phoneme, voice, sample_rate, num_samples)
}
PhonemeClass::Approximant | PhonemeClass::Lateral => {
synthesize_approximant(phoneme, voice, sample_rate, num_samples)
}
PhonemeClass::Trill => synthesize_trill(phoneme, voice, sample_rate, num_samples),
PhonemeClass::Click => synthesize_click(phoneme, voice, sample_rate, num_samples),
PhonemeClass::Ejective => synthesize_ejective(phoneme, voice, sample_rate, num_samples),
PhonemeClass::Implosive => {
synthesize_implosive(phoneme, voice, sample_rate, num_samples)
}
PhonemeClass::Silence => Ok(vec![0.0; num_samples]),
},
}
}
pub fn synthesize_phoneme_nasalized(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
duration: f32,
nasalization: Option<&Nasalization>,
) -> Result<Vec<f32>> {
if duration <= 0.0 || !duration.is_finite() {
return Err(SvaraError::InvalidDuration(format!(
"duration must be positive and finite, got {duration}"
)));
}
if sample_rate <= 0.0 {
return Err(SvaraError::ArticulationFailed(
"sample_rate must be positive".to_string(),
));
}
let num_samples = (duration * sample_rate) as usize;
if num_samples == 0 {
return Ok(Vec::new());
}
let is_vowel_like = matches!(
phoneme.class(),
PhonemeClass::Vowel | PhonemeClass::Diphthong
);
if let (true, Some(nasal)) = (is_vowel_like, nasalization) {
synthesize_vowel_nasalized(phoneme, voice, sample_rate, num_samples, nasal)
} else {
synthesize_phoneme(phoneme, voice, sample_rate, duration)
}
}
fn synthesize_vowel(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
num_samples: usize,
) -> Result<Vec<f32>> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let amps = height_adjusted_amplitudes(phoneme);
let formants = [
Formant::new(target.f1, target.b1, amps[0]),
Formant::new(target.f2, target.b2, amps[1]),
Formant::new(target.f3, target.b3, amps[2]),
Formant::new(target.f4, target.b4, amps[3]),
Formant::new(target.f5, target.b5, amps[4]),
];
let mut tract = VocalTract::new(sample_rate);
tract.set_formants(&formants)?;
let mut glottal = voice
.create_glottal_source(sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
let _tilt = phoneme_spectral_tilt(phoneme);
let mut output = tract.synthesize(&mut glottal, num_samples);
apply_amplitude_envelope(&mut output, num_samples);
Ok(output)
}
fn synthesize_vowel_nasalized(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
num_samples: usize,
nasalization: &Nasalization,
) -> Result<Vec<f32>> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let mut tract = VocalTract::new(sample_rate);
tract.set_formants_from_target(&target)?;
tract.set_nasal_place(nasalization.place);
let mut glottal = voice
.create_glottal_source(sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
let onset_sample = (nasalization.onset * num_samples as f32) as usize;
let ramp_len = num_samples.saturating_sub(onset_sample).max(1);
let is_diphthong = phoneme.class() == PhonemeClass::Diphthong;
let end_target = if is_diphthong {
Some(voice.apply_formant_scale(&diphthong_end_target(phoneme)))
} else {
None
};
let mut output = Vec::with_capacity(num_samples);
for i in 0..num_samples {
if let Some(ref end) = end_target {
let t = i as f32 / num_samples as f32;
let current = VowelTarget::interpolate(&target, end, t);
tract.set_formants_from_target(¤t)?;
}
if i >= onset_sample {
let t = (i - onset_sample) as f32 / ramp_len as f32;
let coupling = nasalization.peak_coupling * hisab::calc::ease_in_out_smooth(t);
tract.set_nasal_coupling(coupling);
}
let excitation = glottal.next_sample();
output.push(tract.process_sample(excitation));
}
apply_amplitude_envelope(&mut output, num_samples);
Ok(output)
}
fn synthesize_diphthong(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
num_samples: usize,
) -> Result<Vec<f32>> {
let start_target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let end_target = voice.apply_formant_scale(&diphthong_end_target(phoneme));
let mut glottal = voice
.create_glottal_source(sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
let mut tract = VocalTract::new(sample_rate);
let mut output = Vec::with_capacity(num_samples);
for i in 0..num_samples {
let t = i as f32 / num_samples as f32;
let current = VowelTarget::interpolate(&start_target, &end_target, t);
tract.set_formants_from_target(¤t)?;
let excitation = glottal.next_sample();
output.push(tract.process_sample(excitation));
}
apply_amplitude_envelope(&mut output, num_samples);
Ok(output)
}
fn synthesize_plosive(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
num_samples: usize,
) -> Result<Vec<f32>> {
let mut output = vec![0.0; num_samples];
let mut noise = NoiseGen::new(17);
let vot = VoiceOnsetTime::for_plosive(phoneme);
let closure_end = (vot.closure_fraction * num_samples as f32) as usize;
let burst_end = closure_end + (vot.burst_fraction * num_samples as f32).max(1.0) as usize;
let aspiration_end = burst_end + (vot.aspiration_fraction * num_samples as f32) as usize;
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let formants = target.to_formants();
let mut filter = FormantFilter::new(&formants, sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
for sample in output[closure_end..burst_end.min(num_samples)].iter_mut() {
*sample = filter.process_sample(noise.next_f32() * 0.8);
}
if aspiration_end > burst_end {
let asp_len = (aspiration_end - burst_end).max(1);
for (j, sample) in output[burst_end..aspiration_end.min(num_samples)]
.iter_mut()
.enumerate()
{
let decay = 1.0 - j as f32 / asp_len as f32;
*sample = filter.process_sample(noise.next_f32() * 0.4 * decay);
}
}
let voice_start = aspiration_end.min(num_samples);
if phoneme.is_voiced() || voice_start < num_samples {
if phoneme.is_voiced() {
let mut glottal = voice
.create_glottal_source(sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
glottal.set_breathiness(0.3);
let mut tract = VocalTract::new(sample_rate);
tract.set_formants_from_target(&target)?;
for sample in output[voice_start..].iter_mut() {
*sample = tract.process_sample(glottal.next_sample()) * 0.5;
}
} else {
for sample in output[voice_start..].iter_mut() {
*sample = filter.process_sample(noise.next_f32() * 0.15);
}
}
}
apply_amplitude_envelope(&mut output, num_samples);
Ok(output)
}
fn synthesize_fricative(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
num_samples: usize,
) -> Result<Vec<f32>> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let mut noise = NoiseGen::new(31);
let formants = fricative_formants(phoneme, sample_rate);
let mut filter = FormantFilter::new(&formants, sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
let mut output = Vec::with_capacity(num_samples);
if phoneme.is_voiced() {
let mut glottal = voice
.create_glottal_source(sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
let mut tract = VocalTract::new(sample_rate);
tract.set_formants_from_target(&target)?;
for _ in 0..num_samples {
let n = noise.next_f32() * 0.5;
let friction = filter.process_sample(n);
let voicing = tract.process_sample(glottal.next_sample()) * 0.4;
output.push(friction + voicing);
}
} else {
for _ in 0..num_samples {
let n = noise.next_f32() * 0.6;
output.push(filter.process_sample(n));
}
}
apply_amplitude_envelope(&mut output, num_samples);
Ok(output)
}
fn fricative_formants(phoneme: &Phoneme, _sample_rate: f32) -> Vec<Formant> {
match phoneme {
Phoneme::FricativeS | Phoneme::FricativeZ => vec![
Formant::new(4500.0, 500.0, 1.0),
Formant::new(7000.0, 800.0, 0.7),
],
Phoneme::FricativeSh | Phoneme::FricativeZh => vec![
Formant::new(2800.0, 600.0, 1.0),
Formant::new(5000.0, 800.0, 0.6),
],
Phoneme::FricativeF | Phoneme::FricativeV => vec![
Formant::new(3000.0, 2000.0, 0.5),
Formant::new(8000.0, 2000.0, 0.3),
],
Phoneme::FricativeTh | Phoneme::FricativeDh => vec![
Formant::new(4000.0, 1500.0, 0.4),
Formant::new(7500.0, 1500.0, 0.3),
],
Phoneme::FricativeH => vec![
Formant::new(1500.0, 800.0, 0.4),
Formant::new(2500.0, 800.0, 0.3),
],
Phoneme::AffricateCh | Phoneme::AffricateJ | Phoneme::EjectiveCh => vec![
Formant::new(2800.0, 600.0, 1.0),
Formant::new(5000.0, 800.0, 0.6),
],
Phoneme::EjectiveS | Phoneme::AffricateTs | Phoneme::AffricateDz => vec![
Formant::new(4500.0, 500.0, 1.0),
Formant::new(7000.0, 800.0, 0.7),
],
Phoneme::AffricateRetro | Phoneme::AffricateRetroVoiced => vec![
Formant::new(3200.0, 600.0, 1.0),
Formant::new(5500.0, 800.0, 0.6),
],
Phoneme::AffricatePf => vec![
Formant::new(3000.0, 2000.0, 0.5),
Formant::new(8000.0, 2000.0, 0.3),
],
Phoneme::AffricateLateral => vec![
Formant::new(3500.0, 800.0, 0.7),
Formant::new(6000.0, 1000.0, 0.4),
],
_ => vec![Formant::new(3000.0, 1000.0, 0.5)],
}
}
fn synthesize_nasal(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
num_samples: usize,
) -> Result<Vec<f32>> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let mut glottal = voice
.create_glottal_source(sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
let mut tract = VocalTract::new(sample_rate);
tract.set_formants_from_target(&target)?;
tract.set_nasal_coupling(0.8);
let place = match phoneme {
Phoneme::NasalM => NasalPlace::Bilabial,
Phoneme::NasalN => NasalPlace::Alveolar,
Phoneme::NasalNg => NasalPlace::Velar,
_ => NasalPlace::Neutral,
};
tract.set_nasal_place(place);
let mut output = tract.synthesize(&mut glottal, num_samples);
apply_amplitude_envelope(&mut output, num_samples);
Ok(output)
}
fn synthesize_approximant(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
num_samples: usize,
) -> Result<Vec<f32>> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let mut glottal = voice
.create_glottal_source(sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
glottal.set_breathiness(voice.breathiness.max(0.1));
let mut tract = VocalTract::new(sample_rate);
tract.set_formants_from_target(&target)?;
let mut output = tract.synthesize(&mut glottal, num_samples);
for sample in &mut output {
*sample *= 0.7;
}
apply_amplitude_envelope(&mut output, num_samples);
Ok(output)
}
fn synthesize_affricate(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
num_samples: usize,
) -> Result<Vec<f32>> {
let mut output = vec![0.0; num_samples];
let mut noise = NoiseGen::new(23);
let closure_end = num_samples / 4;
let burst_end = closure_end + (num_samples / 8).max(1);
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let fric_formants = fricative_formants(phoneme, sample_rate);
let mut filter = FormantFilter::new(&fric_formants, sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
for sample in output.iter_mut().take(burst_end).skip(closure_end) {
let n = noise.next_f32() * 0.8;
*sample = filter.process_sample(n);
}
if phoneme.is_voiced() {
let mut glottal = voice
.create_glottal_source(sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
let mut tract = VocalTract::new(sample_rate);
tract.set_formants_from_target(&target)?;
for sample in output.iter_mut().skip(burst_end) {
let exc = glottal.next_sample();
let voiced = tract.process_sample(exc);
let fric = filter.process_sample(noise.next_f32() * 0.5);
*sample = voiced * 0.5 + fric * 0.5;
}
} else {
for sample in output.iter_mut().skip(burst_end) {
let n = noise.next_f32() * 0.6;
*sample = filter.process_sample(n);
}
}
apply_amplitude_envelope(&mut output, num_samples);
Ok(output)
}
fn synthesize_trill(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
num_samples: usize,
) -> Result<Vec<f32>> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let mut tract = VocalTract::new(sample_rate);
tract.set_formants_from_target(&target)?;
let mut glottal = voice
.create_glottal_source(sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
let trill_rate = match phoneme {
Phoneme::TrillAlveolar => 25.0,
Phoneme::TrillUvular => 20.0,
Phoneme::TrillBilabial => 15.0,
_ => 25.0,
};
let mut output = Vec::with_capacity(num_samples);
for i in 0..num_samples {
let exc = glottal.next_sample();
let sample = tract.process_sample(exc);
let trill_phase = core::f32::consts::TAU * trill_rate * i as f32 / sample_rate;
let trill_env = 0.5 + 0.5 * crate::math::f32::cos(trill_phase);
output.push(sample * trill_env * 0.7);
}
apply_amplitude_envelope(&mut output, num_samples);
Ok(output)
}
fn synthesize_click(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
num_samples: usize,
) -> Result<Vec<f32>> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let formants = target.to_formants();
let mut filter = FormantFilter::new(&formants, sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
let mut noise = NoiseGen::new(41);
let mut output = vec![0.0; num_samples];
let burst_start = num_samples * 6 / 10;
let burst_len = (num_samples - burst_start).max(1);
for (j, sample) in output[burst_start..].iter_mut().enumerate() {
let n = noise.next_f32();
let env = 1.0 - j as f32 / burst_len as f32;
*sample = filter.process_sample(n * env * env);
}
apply_amplitude_envelope(&mut output, num_samples);
Ok(output)
}
fn synthesize_ejective(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
num_samples: usize,
) -> Result<Vec<f32>> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let formants = target.to_formants();
let mut filter = FormantFilter::new(&formants, sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
let mut noise = NoiseGen::new(43);
let is_fricative_ejective = matches!(phoneme, Phoneme::EjectiveS | Phoneme::EjectiveCh);
let mut output = vec![0.0; num_samples];
let closure_end = num_samples * 4 / 10;
let burst_end = closure_end + num_samples * 2 / 10;
for sample in output[closure_end..burst_end.min(num_samples)].iter_mut() {
*sample = filter.process_sample(noise.next_f32());
}
let release_len = num_samples.saturating_sub(burst_end).max(1);
if is_fricative_ejective {
let fric_f = fricative_formants(phoneme, sample_rate);
let mut fric_filter = FormantFilter::new(&fric_f, sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
for (j, sample) in output[burst_end..num_samples].iter_mut().enumerate() {
let t = j as f32 / release_len as f32;
let decay = 1.0 - t * 0.5;
*sample = fric_filter.process_sample(noise.next_f32() * 0.6 * decay);
}
} else {
for (j, sample) in output[burst_end..num_samples].iter_mut().enumerate() {
let t = j as f32 / release_len as f32;
let decay = (1.0 - t).max(0.0);
*sample = filter.process_sample(noise.next_f32() * 0.3 * decay);
}
}
apply_amplitude_envelope(&mut output, num_samples);
Ok(output)
}
fn synthesize_implosive(
phoneme: &Phoneme,
voice: &VoiceProfile,
sample_rate: f32,
num_samples: usize,
) -> Result<Vec<f32>> {
let target = voice.apply_formant_scale(&phoneme_formants(phoneme));
let mut tract = VocalTract::new(sample_rate);
tract.set_formants_from_target(&target)?;
let mut glottal = voice
.create_glottal_source(sample_rate)
.map_err(|e| SvaraError::ArticulationFailed(e.to_string()))?;
glottal.set_creaky(0.5);
let mut output = vec![0.0; num_samples];
let closure_end = num_samples / 3;
for sample in output[closure_end..].iter_mut() {
let exc = glottal.next_sample();
*sample = tract.process_sample(exc) * 0.5;
}
apply_amplitude_envelope(&mut output, num_samples);
Ok(output)
}
fn apply_amplitude_envelope(samples: &mut [f32], _total: usize) {
let len = samples.len();
if len == 0 {
return;
}
let ramp_len = (len / 10).clamp(1, 256);
for (i, sample) in samples.iter_mut().enumerate().take(ramp_len) {
let t = i as f32 / ramp_len as f32;
*sample *= hisab::calc::ease_in_out_smooth(t);
}
for i in 0..ramp_len {
let idx = len - 1 - i;
let t = i as f32 / ramp_len as f32;
samples[idx] *= hisab::calc::ease_in_out_smooth(t);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_phoneme_class() {
assert_eq!(Phoneme::VowelA.class(), PhonemeClass::Vowel);
assert_eq!(Phoneme::PlosiveP.class(), PhonemeClass::Plosive);
assert_eq!(Phoneme::FricativeS.class(), PhonemeClass::Fricative);
assert_eq!(Phoneme::NasalM.class(), PhonemeClass::Nasal);
assert_eq!(Phoneme::DiphthongAI.class(), PhonemeClass::Diphthong);
assert_eq!(Phoneme::ApproximantR.class(), PhonemeClass::Approximant);
assert_eq!(Phoneme::LateralL.class(), PhonemeClass::Lateral);
}
#[test]
fn test_voicing() {
assert!(!Phoneme::PlosiveP.is_voiced());
assert!(Phoneme::PlosiveB.is_voiced());
assert!(Phoneme::VowelA.is_voiced());
assert!(!Phoneme::FricativeS.is_voiced());
assert!(Phoneme::FricativeZ.is_voiced());
}
#[test]
fn test_synthesize_vowel() {
let voice = VoiceProfile::new_male();
let result = synthesize_phoneme(&Phoneme::VowelA, &voice, 44100.0, 0.1);
assert!(result.is_ok());
let samples = result.unwrap();
assert!(!samples.is_empty());
assert!(samples.iter().all(|s| s.is_finite()));
assert!(samples.iter().any(|&s| s.abs() > 1e-6));
}
#[test]
fn test_synthesize_fricative() {
let voice = VoiceProfile::new_male();
let result = synthesize_phoneme(&Phoneme::FricativeS, &voice, 44100.0, 0.08);
assert!(result.is_ok());
}
#[test]
fn test_synthesize_plosive() {
let voice = VoiceProfile::new_male();
let result = synthesize_phoneme(&Phoneme::PlosiveP, &voice, 44100.0, 0.08);
assert!(result.is_ok());
}
#[test]
fn test_synthesize_silence() {
let voice = VoiceProfile::new_male();
let result = synthesize_phoneme(&Phoneme::Silence, &voice, 44100.0, 0.05);
assert!(result.is_ok());
let samples = result.unwrap();
assert!(samples.iter().all(|&s| s.abs() < f32::EPSILON));
}
#[test]
fn test_invalid_duration() {
let voice = VoiceProfile::new_male();
assert!(synthesize_phoneme(&Phoneme::VowelA, &voice, 44100.0, -1.0).is_err());
}
#[test]
fn test_serde_roundtrip() {
let p = Phoneme::VowelA;
let json = serde_json::to_string(&p).unwrap();
let p2: Phoneme = serde_json::from_str(&json).unwrap();
assert_eq!(p, p2);
}
#[test]
fn test_nasalization_for_nasal_phonemes() {
assert!(Nasalization::for_nasal(&Phoneme::NasalM).is_some());
assert!(Nasalization::for_nasal(&Phoneme::NasalN).is_some());
assert!(Nasalization::for_nasal(&Phoneme::NasalNg).is_some());
assert!(Nasalization::for_nasal(&Phoneme::VowelA).is_none());
assert!(Nasalization::for_nasal(&Phoneme::PlosiveP).is_none());
}
#[test]
fn test_nasalization_place_matches() {
let n = Nasalization::for_nasal(&Phoneme::NasalM).unwrap();
assert_eq!(n.place, NasalPlace::Bilabial);
let n = Nasalization::for_nasal(&Phoneme::NasalN).unwrap();
assert_eq!(n.place, NasalPlace::Alveolar);
let n = Nasalization::for_nasal(&Phoneme::NasalNg).unwrap();
assert_eq!(n.place, NasalPlace::Velar);
let n = Nasalization::for_nasal(&Phoneme::NasalRetro).unwrap();
assert_eq!(n.place, NasalPlace::Alveolar);
let n = Nasalization::for_nasal(&Phoneme::NasalPalatal).unwrap();
assert_eq!(n.place, NasalPlace::Alveolar);
let n = Nasalization::for_nasal(&Phoneme::NasalUvular).unwrap();
assert_eq!(n.place, NasalPlace::Velar);
}
#[test]
fn test_nasalized_vowel_produces_output() {
let voice = VoiceProfile::new_male();
let nasal = Nasalization::for_nasal(&Phoneme::NasalN).unwrap();
let result =
synthesize_phoneme_nasalized(&Phoneme::VowelA, &voice, 44100.0, 0.1, Some(&nasal));
assert!(result.is_ok());
let samples = result.unwrap();
assert!(!samples.is_empty());
assert!(samples.iter().all(|s| s.is_finite()));
}
#[test]
fn test_nasalized_vowel_differs_from_oral() {
let voice = VoiceProfile::new_male();
let nasal = Nasalization::for_nasal(&Phoneme::NasalN).unwrap();
let oral = synthesize_phoneme(&Phoneme::VowelA, &voice, 44100.0, 0.1).unwrap();
let nasalized =
synthesize_phoneme_nasalized(&Phoneme::VowelA, &voice, 44100.0, 0.1, Some(&nasal))
.unwrap();
let diff: f32 = oral
.iter()
.zip(nasalized.iter())
.map(|(a, b)| (a - b).abs())
.sum();
assert!(
diff > 0.01,
"nasalized vowel should differ from oral: diff={diff}"
);
}
#[test]
fn test_nasalized_non_vowel_falls_through() {
let voice = VoiceProfile::new_male();
let nasal = Nasalization::for_nasal(&Phoneme::NasalN).unwrap();
let result =
synthesize_phoneme_nasalized(&Phoneme::FricativeS, &voice, 44100.0, 0.08, Some(&nasal));
assert!(result.is_ok());
}
#[test]
fn test_serde_roundtrip_nasalization() {
let n = Nasalization::for_nasal(&Phoneme::NasalM).unwrap();
let json = serde_json::to_string(&n).unwrap();
let n2: Nasalization = serde_json::from_str(&json).unwrap();
assert_eq!(n2.place, NasalPlace::Bilabial);
assert!((n2.onset - 0.65).abs() < f32::EPSILON);
}
#[test]
fn test_synthesis_context_creation() {
let voice = VoiceProfile::new_male();
let ctx = SynthesisContext::new(&voice, 44100.0);
assert!(ctx.is_ok());
}
#[test]
fn test_synthesis_context_vowel() {
let voice = VoiceProfile::new_male();
let mut ctx = SynthesisContext::new(&voice, 44100.0).unwrap();
let samples = ctx.synthesize(&Phoneme::VowelA, &voice, 0.1, None).unwrap();
assert!(!samples.is_empty());
assert!(samples.iter().all(|s| s.is_finite()));
assert!(samples.iter().any(|&s| s.abs() > 1e-6));
}
#[test]
fn test_synthesis_context_all_classes() {
let voice = VoiceProfile::new_male();
let mut ctx = SynthesisContext::new(&voice, 44100.0).unwrap();
let phonemes = [
Phoneme::VowelA,
Phoneme::DiphthongAI,
Phoneme::PlosiveP,
Phoneme::FricativeS,
Phoneme::NasalN,
Phoneme::AffricateCh,
Phoneme::ApproximantR,
Phoneme::LateralL,
Phoneme::Silence,
];
for p in &phonemes {
let samples = ctx.synthesize(p, &voice, 0.08, None).unwrap();
assert!(
samples.iter().all(|s| s.is_finite()),
"{p:?} produced non-finite samples"
);
}
}
#[test]
fn test_synthesis_context_reuse() {
let voice = VoiceProfile::new_male();
let mut ctx = SynthesisContext::new(&voice, 44100.0).unwrap();
let s1 = ctx.synthesize(&Phoneme::VowelA, &voice, 0.1, None).unwrap();
let len1 = s1.len();
let s2 = ctx
.synthesize(&Phoneme::VowelI, &voice, 0.05, None)
.unwrap();
let len2 = s2.len();
assert_ne!(len1, len2);
}
#[test]
fn test_synthesis_context_with_nasalization() {
let voice = VoiceProfile::new_male();
let mut ctx = SynthesisContext::new(&voice, 44100.0).unwrap();
let nasal = Nasalization::for_nasal(&Phoneme::NasalN).unwrap();
let samples = ctx
.synthesize(&Phoneme::VowelA, &voice, 0.1, Some(&nasal))
.unwrap();
assert!(samples.iter().all(|s| s.is_finite()));
}
#[test]
fn test_serde_roundtrip_synthesis_context() {
let voice = VoiceProfile::new_male();
let ctx = SynthesisContext::new(&voice, 44100.0).unwrap();
let json = serde_json::to_string(&ctx).unwrap();
let ctx2: SynthesisContext = serde_json::from_str(&json).unwrap();
assert!((ctx2.sample_rate - 44100.0).abs() < f32::EPSILON);
}
#[test]
fn test_nasalized_diphthong() {
let voice = VoiceProfile::new_male();
let nasal = Nasalization::for_nasal(&Phoneme::NasalN).unwrap();
let result = synthesize_phoneme_nasalized(
&Phoneme::DiphthongAI,
&voice,
44100.0,
0.15,
Some(&nasal),
);
assert!(result.is_ok());
let samples = result.unwrap();
assert!(samples.iter().all(|s| s.is_finite()));
assert!(samples.iter().any(|&s| s.abs() > 1e-6));
}
#[test]
fn test_synthesis_context_sequential_different_classes() {
let voice = VoiceProfile::new_male();
let mut ctx = SynthesisContext::new(&voice, 44100.0).unwrap();
let s1 = ctx
.synthesize(&Phoneme::FricativeS, &voice, 0.06, None)
.unwrap();
assert!(s1.iter().all(|s| s.is_finite()));
let s2 = ctx
.synthesize(&Phoneme::VowelA, &voice, 0.08, None)
.unwrap();
assert!(s2.iter().all(|s| s.is_finite()));
assert!(s2.iter().any(|&s| s.abs() > 1e-6));
let s3 = ctx
.synthesize(&Phoneme::NasalM, &voice, 0.06, None)
.unwrap();
assert!(s3.iter().all(|s| s.is_finite()));
}
#[test]
fn test_click_class() {
assert_eq!(Phoneme::ClickDental.class(), PhonemeClass::Click);
assert_eq!(Phoneme::ClickAlveolar.class(), PhonemeClass::Click);
assert_eq!(Phoneme::ClickBilabial.class(), PhonemeClass::Click);
assert!(!Phoneme::ClickDental.is_voiced());
}
#[test]
fn test_ejective_class() {
assert_eq!(Phoneme::EjectiveT.class(), PhonemeClass::Ejective);
assert_eq!(Phoneme::EjectiveS.class(), PhonemeClass::Ejective);
assert!(!Phoneme::EjectiveK.is_voiced());
}
#[test]
fn test_implosive_class() {
assert_eq!(Phoneme::ImplosiveB.class(), PhonemeClass::Implosive);
assert!(Phoneme::ImplosiveD.is_voiced());
}
#[test]
fn test_synthesize_all_clicks() {
let voice = VoiceProfile::new_male();
for phoneme in [
Phoneme::ClickBilabial,
Phoneme::ClickDental,
Phoneme::ClickAlveolar,
Phoneme::ClickPalatal,
Phoneme::ClickLateral,
] {
let result = synthesize_phoneme(&phoneme, &voice, 44100.0, 0.04);
assert!(result.is_ok(), "{phoneme:?} failed");
let samples = result.unwrap();
assert!(
samples.iter().all(|s| s.is_finite()),
"{phoneme:?} non-finite"
);
}
}
#[test]
fn test_synthesize_all_ejectives() {
let voice = VoiceProfile::new_male();
for phoneme in [
Phoneme::EjectiveP,
Phoneme::EjectiveT,
Phoneme::EjectiveK,
Phoneme::EjectiveS,
Phoneme::EjectiveCh,
] {
let result = synthesize_phoneme(&phoneme, &voice, 44100.0, 0.1);
assert!(result.is_ok(), "{phoneme:?} failed");
let samples = result.unwrap();
assert!(
samples.iter().all(|s| s.is_finite()),
"{phoneme:?} non-finite"
);
}
}
#[test]
fn test_synthesize_all_implosives() {
let voice = VoiceProfile::new_male();
for phoneme in [
Phoneme::ImplosiveB,
Phoneme::ImplosiveD,
Phoneme::ImplosiveG,
] {
let result = synthesize_phoneme(&phoneme, &voice, 44100.0, 0.08);
assert!(result.is_ok(), "{phoneme:?} failed");
let samples = result.unwrap();
assert!(
samples.iter().all(|s| s.is_finite()),
"{phoneme:?} non-finite"
);
assert!(
samples.iter().any(|&s| s.abs() > 1e-6),
"{phoneme:?} silent"
);
}
}
#[test]
fn test_synthesis_context_non_pulmonic() {
let voice = VoiceProfile::new_male();
let mut ctx = SynthesisContext::new(&voice, 44100.0).unwrap();
for phoneme in [
Phoneme::ClickDental,
Phoneme::EjectiveT,
Phoneme::ImplosiveD,
] {
let samples = ctx.synthesize(&phoneme, &voice, 0.08, None).unwrap();
assert!(
samples.iter().all(|s| s.is_finite()),
"{phoneme:?} non-finite in ctx"
);
}
}
#[test]
fn test_serde_roundtrip_non_pulmonic() {
for phoneme in [
Phoneme::ClickDental,
Phoneme::ClickLateral,
Phoneme::EjectiveP,
Phoneme::EjectiveS,
Phoneme::ImplosiveB,
Phoneme::ImplosiveG,
] {
let json = serde_json::to_string(&phoneme).unwrap();
let p2: Phoneme = serde_json::from_str(&json).unwrap();
assert_eq!(phoneme, p2);
}
}
#[test]
fn test_ipa_vowels_synthesize() {
let voice = VoiceProfile::new_male();
for phoneme in [
Phoneme::VowelY,
Phoneme::VowelFrontRoundO,
Phoneme::VowelOpenFrontRoundO,
Phoneme::VowelCloseBackUnrounded,
Phoneme::VowelMidBackUnrounded,
Phoneme::VowelCloseCentral,
Phoneme::VowelCloseCentralRounded,
] {
let result = synthesize_phoneme(&phoneme, &voice, 44100.0, 0.08);
assert!(result.is_ok(), "{phoneme:?} failed");
let samples = result.unwrap();
assert!(
samples.iter().all(|s| s.is_finite()),
"{phoneme:?} non-finite"
);
assert!(
samples.iter().any(|&s| s.abs() > 1e-6),
"{phoneme:?} silent"
);
}
}
#[test]
fn test_ipa_consonants_synthesize() {
let voice = VoiceProfile::new_male();
let consonants = [
Phoneme::PlosiveUvularQ,
Phoneme::PlosiveUvularG,
Phoneme::PlosiveRetroT,
Phoneme::PlosiveRetroD,
Phoneme::FricativeBilabialPh,
Phoneme::FricativeBilabialB,
Phoneme::FricativePalatalC,
Phoneme::FricativePalatalJ,
Phoneme::FricativeUvularCh,
Phoneme::FricativeUvularR,
Phoneme::FricativePharyngealH,
Phoneme::FricativePharyngealA,
Phoneme::FricativeRetroS,
Phoneme::FricativeRetroZ,
Phoneme::FricativeLateralS,
Phoneme::FricativeLateralZ,
Phoneme::FricativeGlottalH,
Phoneme::NasalRetro,
Phoneme::NasalPalatal,
Phoneme::NasalUvular,
Phoneme::TrillBilabial,
Phoneme::TrillAlveolar,
Phoneme::TrillUvular,
Phoneme::ApproximantRetro,
Phoneme::LateralPalatal,
Phoneme::LateralVelar,
Phoneme::FlapRetro,
Phoneme::FlapLateral,
Phoneme::AffricateTs,
Phoneme::AffricateDz,
Phoneme::AffricateRetro,
Phoneme::AffricateRetroVoiced,
Phoneme::AffricatePf,
Phoneme::AffricateLateral,
];
for phoneme in consonants {
let dur = phoneme_duration(&phoneme);
let result = synthesize_phoneme(&phoneme, &voice, 44100.0, dur);
assert!(result.is_ok(), "{phoneme:?} failed");
let samples = result.unwrap();
assert!(
samples.iter().all(|s| s.is_finite()),
"{phoneme:?} non-finite"
);
}
}
#[test]
fn test_phoneme_count_over_100() {
let all_phonemes = [
Phoneme::VowelA,
Phoneme::VowelE,
Phoneme::VowelI,
Phoneme::VowelO,
Phoneme::VowelU,
Phoneme::VowelSchwa,
Phoneme::VowelOpenO,
Phoneme::VowelAsh,
Phoneme::VowelNearI,
Phoneme::VowelNearU,
Phoneme::VowelOpenA,
Phoneme::VowelOpenE,
Phoneme::VowelCupV,
Phoneme::VowelBird,
Phoneme::VowelLongI,
Phoneme::DiphthongAI,
Phoneme::DiphthongAU,
Phoneme::DiphthongOI,
Phoneme::DiphthongEI,
Phoneme::DiphthongOU,
Phoneme::PlosiveP,
Phoneme::PlosiveB,
Phoneme::PlosiveT,
Phoneme::PlosiveD,
Phoneme::PlosiveK,
Phoneme::PlosiveG,
Phoneme::FricativeF,
Phoneme::FricativeV,
Phoneme::FricativeS,
Phoneme::FricativeZ,
Phoneme::FricativeSh,
Phoneme::FricativeZh,
Phoneme::FricativeTh,
Phoneme::FricativeDh,
Phoneme::FricativeH,
Phoneme::NasalM,
Phoneme::NasalN,
Phoneme::NasalNg,
Phoneme::AffricateCh,
Phoneme::AffricateJ,
Phoneme::GlottalStop,
Phoneme::TapFlap,
Phoneme::LateralL,
Phoneme::ApproximantR,
Phoneme::ApproximantW,
Phoneme::ApproximantJ,
Phoneme::VowelY,
Phoneme::VowelFrontRoundO,
Phoneme::VowelOpenFrontRoundO,
Phoneme::VowelCloseBackUnrounded,
Phoneme::VowelMidBackUnrounded,
Phoneme::VowelCloseCentral,
Phoneme::VowelCloseCentralRounded,
Phoneme::PlosiveUvularQ,
Phoneme::PlosiveUvularG,
Phoneme::PlosiveRetroT,
Phoneme::PlosiveRetroD,
Phoneme::FricativeBilabialPh,
Phoneme::FricativeBilabialB,
Phoneme::FricativePalatalC,
Phoneme::FricativePalatalJ,
Phoneme::FricativeUvularCh,
Phoneme::FricativeUvularR,
Phoneme::FricativePharyngealH,
Phoneme::FricativePharyngealA,
Phoneme::FricativeRetroS,
Phoneme::FricativeRetroZ,
Phoneme::FricativeLateralS,
Phoneme::FricativeLateralZ,
Phoneme::FricativeGlottalH,
Phoneme::NasalRetro,
Phoneme::NasalPalatal,
Phoneme::NasalUvular,
Phoneme::TrillBilabial,
Phoneme::TrillAlveolar,
Phoneme::TrillUvular,
Phoneme::ApproximantRetro,
Phoneme::LateralPalatal,
Phoneme::LateralVelar,
Phoneme::FlapRetro,
Phoneme::FlapLateral,
Phoneme::AffricateTs,
Phoneme::AffricateDz,
Phoneme::AffricateRetro,
Phoneme::AffricateRetroVoiced,
Phoneme::AffricatePf,
Phoneme::AffricateLateral,
Phoneme::ClickBilabial,
Phoneme::ClickDental,
Phoneme::ClickAlveolar,
Phoneme::ClickPalatal,
Phoneme::ClickLateral,
Phoneme::EjectiveP,
Phoneme::EjectiveT,
Phoneme::EjectiveK,
Phoneme::EjectiveS,
Phoneme::EjectiveCh,
Phoneme::ImplosiveB,
Phoneme::ImplosiveD,
Phoneme::ImplosiveG,
];
assert!(
all_phonemes.len() >= 100,
"expected >= 100 phonemes, got {}",
all_phonemes.len()
);
}
}