use super::builder_impl::VoirsPipelineBuilder;
#[async_trait::async_trait]
pub trait FeatureBuilder<T> {
fn with_enabled(enabled: bool) -> Self;
fn with_config(config: T) -> Self;
async fn build(self) -> crate::Result<Box<dyn std::any::Any + Send + Sync>>;
}
impl VoirsPipelineBuilder {
#[cfg(feature = "emotion")]
pub fn with_emotion_control_enabled(mut self, enabled: bool) -> Self {
self.config.default_synthesis.enable_emotion = enabled;
self
}
#[cfg(feature = "emotion")]
pub fn with_emotion_control(
mut self,
builder: crate::emotion::EmotionControllerBuilder,
) -> Self {
self.config.default_synthesis.enable_emotion = true;
self
}
#[cfg(feature = "emotion")]
pub fn with_default_emotion(mut self, emotion_type: &str, intensity: f32) -> Self {
self.config.default_synthesis.emotion_type = Some(emotion_type.to_string());
self.config.default_synthesis.emotion_intensity = intensity.clamp(0.0, 1.0);
self.config.default_synthesis.enable_emotion = true;
self
}
#[cfg(feature = "emotion")]
pub fn with_emotion_preset(mut self, preset: EmotionPreset) -> Self {
let (emotion_type, intensity) = match preset {
EmotionPreset::Happy => ("happy", 0.8),
EmotionPreset::Sad => ("sad", 0.7),
EmotionPreset::Excited => ("excited", 0.9),
EmotionPreset::Calm => ("calm", 0.6),
EmotionPreset::Angry => ("angry", 0.8),
EmotionPreset::Neutral => ("neutral", 0.0),
};
self.with_default_emotion(emotion_type, intensity)
}
#[cfg(feature = "emotion")]
pub fn with_auto_emotion_detection(mut self, enabled: bool) -> Self {
self.config.default_synthesis.auto_emotion_detection = enabled;
self
}
#[cfg(feature = "cloning")]
pub fn with_voice_cloning_enabled(mut self, enabled: bool) -> Self {
self.config.default_synthesis.enable_cloning = enabled;
self
}
#[cfg(feature = "cloning")]
pub fn with_voice_cloning(mut self, builder: crate::cloning::VoiceClonerBuilder) -> Self {
self.config.default_synthesis.enable_cloning = true;
self
}
#[cfg(feature = "cloning")]
pub fn with_cloning_method(mut self, method: CloningMethod) -> Self {
self.config.default_synthesis.cloning_method = Some(method);
self.config.default_synthesis.enable_cloning = true;
self
}
#[cfg(feature = "cloning")]
pub fn with_cloning_preset(mut self, preset: CloningPreset) -> Self {
let (method, quality) = match preset {
CloningPreset::HighQuality => (CloningMethod::DeepClone, 0.95),
CloningPreset::Fast => (CloningMethod::QuickClone, 0.7),
CloningPreset::Balanced => (CloningMethod::AdaptiveClone, 0.85),
};
self.config.default_synthesis.cloning_method = Some(method);
self.config.default_synthesis.cloning_quality = quality;
self.config.default_synthesis.enable_cloning = true;
self
}
#[cfg(feature = "conversion")]
pub fn with_voice_conversion_enabled(mut self, enabled: bool) -> Self {
self.config.default_synthesis.enable_conversion = enabled;
self
}
#[cfg(feature = "conversion")]
pub fn with_voice_conversion(
mut self,
builder: crate::conversion::VoiceConverterBuilder,
) -> Self {
self.config.default_synthesis.enable_conversion = true;
self
}
#[cfg(feature = "conversion")]
pub fn with_conversion_target(mut self, target: ConversionTarget) -> Self {
self.config.default_synthesis.conversion_target = Some(target);
self.config.default_synthesis.enable_conversion = true;
self
}
#[cfg(feature = "conversion")]
pub fn with_conversion_preset(mut self, preset: ConversionPreset) -> Self {
let target = match preset {
ConversionPreset::MaleToFemale => ConversionTarget::Gender(Gender::Female),
ConversionPreset::FemaleToMale => ConversionTarget::Gender(Gender::Male),
ConversionPreset::YoungToOld => ConversionTarget::Age(AgeGroup::Senior),
ConversionPreset::OldToYoung => ConversionTarget::Age(AgeGroup::Young),
};
self.with_conversion_target(target)
}
#[cfg(feature = "conversion")]
pub fn with_realtime_conversion(mut self, enabled: bool) -> Self {
self.config.default_synthesis.realtime_conversion = enabled;
self
}
#[cfg(feature = "singing")]
pub fn with_singing_synthesis_enabled(mut self, enabled: bool) -> Self {
self.config.default_synthesis.enable_singing = enabled;
self
}
#[cfg(feature = "singing")]
pub fn with_singing_synthesis(
mut self,
builder: crate::singing::SingingControllerBuilder,
) -> Self {
self.config.default_synthesis.enable_singing = true;
self
}
#[cfg(feature = "singing")]
pub fn with_singing_voice_type(mut self, voice_type: SingingVoiceType) -> Self {
self.config.default_synthesis.singing_voice_type = Some(voice_type);
self.config.default_synthesis.enable_singing = true;
self
}
#[cfg(feature = "singing")]
pub fn with_singing_preset(mut self, preset: SingingPreset) -> Self {
let (voice_type, technique) = match preset {
SingingPreset::PopSinger => (SingingVoiceType::PopVocalist, SingingTechnique::modern()),
SingingPreset::OperaSinger => {
(SingingVoiceType::OperaSinger, SingingTechnique::classical())
}
SingingPreset::JazzSinger => (SingingVoiceType::JazzVocalist, SingingTechnique::jazz()),
SingingPreset::RockSinger => (SingingVoiceType::RockVocalist, SingingTechnique::rock()),
};
self.config.default_synthesis.singing_voice_type = Some(voice_type);
self.config.default_synthesis.singing_technique = Some(technique);
self.config.default_synthesis.enable_singing = true;
self
}
#[cfg(feature = "singing")]
pub fn with_musical_key(mut self, key: MusicalKey) -> Self {
self.config.default_synthesis.musical_key = Some(key);
self
}
#[cfg(feature = "singing")]
pub fn with_tempo(mut self, bpm: f32) -> Self {
self.config.default_synthesis.tempo = Some(bpm);
self
}
#[cfg(feature = "spatial")]
pub fn with_spatial_audio_enabled(mut self, enabled: bool) -> Self {
self.config.default_synthesis.enable_spatial = enabled;
self
}
#[cfg(feature = "spatial")]
pub fn with_spatial_audio(
mut self,
builder: crate::spatial::SpatialAudioControllerBuilder,
) -> Self {
self.config.default_synthesis.enable_spatial = true;
self
}
#[cfg(feature = "spatial")]
pub fn with_listener_position(mut self, position: Position3D) -> Self {
self.config.default_synthesis.listener_position = Some(position);
self.config.default_synthesis.enable_spatial = true;
self
}
#[cfg(feature = "spatial")]
pub fn with_spatial_preset(mut self, preset: SpatialPreset) -> Self {
let (hrtf_enabled, room_size, reverb_level) = match preset {
SpatialPreset::Headphones => (true, RoomSize::Small, 0.2),
SpatialPreset::Speakers => (false, RoomSize::Medium, 0.4),
SpatialPreset::VirtualReality => (true, RoomSize::Large, 0.6),
SpatialPreset::AugmentedReality => (true, RoomSize::Medium, 0.3),
};
self.config.default_synthesis.hrtf_enabled = hrtf_enabled;
self.config.default_synthesis.room_size = Some(room_size);
self.config.default_synthesis.reverb_level = reverb_level;
self.config.default_synthesis.enable_spatial = true;
self
}
#[cfg(feature = "spatial")]
pub fn with_hrtf_processing(mut self, enabled: bool) -> Self {
self.config.default_synthesis.hrtf_enabled = enabled;
self
}
#[cfg(feature = "spatial")]
pub fn with_room_acoustics(mut self, room_size: RoomSize, reverb_level: f32) -> Self {
self.config.default_synthesis.room_size = Some(room_size);
self.config.default_synthesis.reverb_level = reverb_level.clamp(0.0, 1.0);
self
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EmotionPreset {
Happy,
Sad,
Excited,
Calm,
Angry,
Neutral,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CloningPreset {
HighQuality,
Fast,
Balanced,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConversionPreset {
MaleToFemale,
FemaleToMale,
YoungToOld,
OldToYoung,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SingingPreset {
PopSinger,
OperaSinger,
JazzSinger,
RockSinger,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SpatialPreset {
Headphones,
Speakers,
VirtualReality,
AugmentedReality,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum CloningMethod {
QuickClone,
DeepClone,
AdaptiveClone,
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum ConversionTarget {
Gender(Gender),
Age(AgeGroup),
Voice(String),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum SingingVoiceType {
PopVocalist,
OperaSinger,
JazzVocalist,
RockVocalist,
Choir,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum MusicalKey {
C,
CSharp,
D,
DSharp,
E,
F,
FSharp,
G,
GSharp,
A,
ASharp,
B,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum RoomSize {
Small,
Medium,
Large,
Huge,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum Gender {
Male,
Female,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum AgeGroup {
Child,
Young,
Adult,
Senior,
}
#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct Position3D {
pub x: f32,
pub y: f32,
pub z: f32,
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct SingingTechnique {
pub breath_control: f32,
pub vibrato_depth: f32,
pub vocal_fry: f32,
pub head_voice_ratio: f32,
}
impl SingingTechnique {
pub fn modern() -> Self {
Self {
breath_control: 0.7,
vibrato_depth: 0.3,
vocal_fry: 0.2,
head_voice_ratio: 0.6,
}
}
pub fn classical() -> Self {
Self {
breath_control: 0.9,
vibrato_depth: 0.5,
vocal_fry: 0.0,
head_voice_ratio: 0.8,
}
}
pub fn jazz() -> Self {
Self {
breath_control: 0.6,
vibrato_depth: 0.4,
vocal_fry: 0.3,
head_voice_ratio: 0.5,
}
}
pub fn rock() -> Self {
Self {
breath_control: 0.8,
vibrato_depth: 0.2,
vocal_fry: 0.4,
head_voice_ratio: 0.3,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_emotion_presets() {
#[cfg(feature = "emotion")]
{
let happy_builder =
VoirsPipelineBuilder::new().with_emotion_preset(EmotionPreset::Happy);
assert!(happy_builder.config.default_synthesis.enable_emotion);
assert_eq!(
happy_builder.config.default_synthesis.emotion_type,
Some("happy".to_string())
);
assert_eq!(
happy_builder.config.default_synthesis.emotion_intensity,
0.8
);
let calm_builder = VoirsPipelineBuilder::new().with_emotion_preset(EmotionPreset::Calm);
assert!(calm_builder.config.default_synthesis.enable_emotion);
assert_eq!(
calm_builder.config.default_synthesis.emotion_type,
Some("calm".to_string())
);
assert_eq!(calm_builder.config.default_synthesis.emotion_intensity, 0.6);
}
}
#[test]
fn test_feature_combinations() {
#[cfg(all(feature = "emotion", feature = "spatial"))]
{
let combined_builder = VoirsPipelineBuilder::new()
.with_emotion_preset(EmotionPreset::Happy)
.with_spatial_preset(SpatialPreset::Headphones);
assert!(combined_builder.config.default_synthesis.enable_emotion);
assert!(combined_builder.config.default_synthesis.enable_spatial);
assert!(combined_builder.config.default_synthesis.hrtf_enabled);
}
}
#[test]
fn test_singing_technique_presets() {
let modern = SingingTechnique::modern();
assert_eq!(modern.breath_control, 0.7);
assert_eq!(modern.vibrato_depth, 0.3);
let classical = SingingTechnique::classical();
assert_eq!(classical.breath_control, 0.9);
assert_eq!(classical.vibrato_depth, 0.5);
assert_eq!(classical.vocal_fry, 0.0);
}
}