Skip to main content

voirs_cli/commands/interactive/
synthesis.rs

1//! Real-time synthesis engine for interactive mode
2//!
3//! Handles:
4//! - Real-time text-to-speech synthesis
5//! - Immediate audio playback
6//! - Voice switching during session
7//! - Audio parameter adjustments
8
9use crate::audio::playback::{AudioData, AudioPlayer, PlaybackConfig};
10use crate::error::{Result, VoirsCliError};
11use std::sync::Arc;
12use tokio::sync::RwLock;
13
14/// Real-time synthesis engine
15pub struct SynthesisEngine {
16    /// VoiRS SDK pipeline for synthesis
17    pipeline: Option<Arc<RwLock<voirs_sdk::VoirsPipeline>>>,
18
19    /// Audio player for immediate playback
20    audio_player: AudioPlayer,
21
22    /// Current synthesis parameters
23    current_speed: f32,
24    current_pitch: f32,
25    current_volume: f32,
26
27    /// Available voices cache
28    available_voices: Vec<String>,
29
30    /// Current voice
31    current_voice: Option<String>,
32}
33
34impl SynthesisEngine {
35    /// Create a new synthesis engine
36    pub async fn new() -> Result<Self> {
37        // Initialize audio player with default config
38        let config = PlaybackConfig::default();
39        let audio_player = AudioPlayer::new(config).map_err(|e| {
40            VoirsCliError::AudioError(format!("Failed to initialize audio player: {}", e))
41        })?;
42
43        // Load available voices
44        let available_voices = Self::load_available_voices().await?;
45
46        Ok(Self {
47            pipeline: None,
48            audio_player,
49            current_speed: 1.0,
50            current_pitch: 0.0,
51            current_volume: 1.0,
52            available_voices,
53            current_voice: None,
54        })
55    }
56
57    /// Load available voices from the system
58    async fn load_available_voices() -> Result<Vec<String>> {
59        // For now, return a list of common voices
60        // In a real implementation, this would query the VoiRS system
61        Ok(vec![
62            "en-us-female-01".to_string(),
63            "en-us-male-01".to_string(),
64            "en-gb-female-01".to_string(),
65            "ja-jp-female-01".to_string(),
66        ])
67    }
68
69    /// Get list of available voices
70    pub async fn list_voices(&self) -> Result<Vec<String>> {
71        Ok(self.available_voices.clone())
72    }
73
74    /// Set the current voice
75    pub async fn set_voice(&mut self, voice: &str) -> Result<()> {
76        // Validate voice exists
77        if !self.available_voices.contains(&voice.to_string()) {
78            return Err(VoirsCliError::VoiceError(format!(
79                "Voice '{}' not found. Available voices: {}",
80                voice,
81                self.available_voices.join(", ")
82            )));
83        }
84
85        // Initialize pipeline if needed
86        if self.pipeline.is_none() {
87            self.pipeline = Some(Arc::new(RwLock::new(self.create_pipeline(voice).await?)));
88        } else {
89            // Switch voice in existing pipeline
90            if let Some(ref pipeline) = self.pipeline {
91                let mut pipeline_guard = pipeline.write().await;
92                pipeline_guard.set_voice(voice).await.map_err(|e| {
93                    VoirsCliError::SynthesisError(format!("Failed to set voice: {}", e))
94                })?;
95            }
96        }
97
98        self.current_voice = Some(voice.to_string());
99        println!("✓ Voice set to: {}", voice);
100
101        Ok(())
102    }
103
104    /// Create a new VoiRS pipeline
105    async fn create_pipeline(&self, voice: &str) -> Result<voirs_sdk::VoirsPipeline> {
106        // Create pipeline using VoiRS SDK builder
107        let pipeline = voirs_sdk::VoirsPipeline::builder()
108            .with_quality(voirs_sdk::QualityLevel::High)
109            .with_voice(voice)
110            .build()
111            .await
112            .map_err(|e| {
113                VoirsCliError::SynthesisError(format!(
114                    "Failed to create VoiRS pipeline for voice '{}': {}",
115                    voice, e
116                ))
117            })?;
118
119        Ok(pipeline)
120    }
121
122    /// Synthesize text to audio
123    pub async fn synthesize(&self, text: &str) -> Result<Vec<f32>> {
124        if self.pipeline.is_none() {
125            return Err(VoirsCliError::SynthesisError(
126                "No voice selected. Use ':voice <voice_name>' to set a voice.".to_string(),
127            ));
128        }
129
130        // Use the VoiRS pipeline for real synthesis
131        if let Some(pipeline) = &self.pipeline {
132            // Build synthesis configuration
133            let mut config = voirs_sdk::types::SynthesisConfig::default();
134            config.speaking_rate = self.current_speed;
135            config.pitch_shift = self.current_pitch;
136            config.volume_gain = self.current_volume;
137
138            // Perform synthesis
139            let pipeline_guard = pipeline.read().await;
140            match pipeline_guard.synthesize_with_config(text, &config).await {
141                Ok(audio_buffer) => {
142                    // Convert AudioBuffer to Vec<f32>
143                    Ok(audio_buffer.samples().to_vec())
144                }
145                Err(e) => {
146                    tracing::warn!("Synthesis failed, falling back to placeholder: {}", e);
147
148                    // Fallback to simple sine wave generation
149                    let sample_rate = 22050;
150                    let duration_ms = text.len() as f32 * 50.0; // Rough estimate
151                    let num_samples = (sample_rate as f32 * duration_ms / 1000.0) as usize;
152
153                    let frequency = 440.0; // A4 note
154                    let mut samples = Vec::with_capacity(num_samples);
155
156                    for i in 0..num_samples {
157                        let t = i as f32 / sample_rate as f32;
158                        let sample = (2.0 * std::f32::consts::PI * frequency * t).sin()
159                            * 0.1
160                            * self.current_volume;
161                        samples.push(sample);
162                    }
163
164                    // Simulate processing time for fallback
165                    tokio::time::sleep(tokio::time::Duration::from_millis(
166                        (text.len() as u64 * 10).min(500),
167                    ))
168                    .await;
169
170                    Ok(samples)
171                }
172            }
173        } else {
174            Err(VoirsCliError::SynthesisError(
175                "No pipeline available for synthesis".to_string(),
176            ))
177        }
178    }
179
180    /// Play audio data
181    pub async fn play_audio(&mut self, audio_data: &[f32]) -> Result<()> {
182        // Convert f32 samples to i16 for AudioData
183        let samples_i16: Vec<i16> = audio_data
184            .iter()
185            .map(|&sample| (sample * i16::MAX as f32) as i16)
186            .collect();
187
188        let audio_data = AudioData {
189            samples: samples_i16,
190            sample_rate: 22050,
191            channels: 1,
192        };
193
194        self.audio_player
195            .play(&audio_data)
196            .await
197            .map_err(|e| VoirsCliError::AudioError(format!("Failed to play audio: {}", e)))?;
198
199        Ok(())
200    }
201
202    /// Set synthesis speed
203    pub async fn set_speed(&mut self, speed: f32) -> Result<()> {
204        self.current_speed = speed.clamp(0.1, 3.0);
205
206        // Apply to pipeline if available
207        if let Some(ref pipeline) = self.pipeline {
208            // In a real implementation, this would configure the pipeline
209            println!("✓ Speed set to: {:.1}x", self.current_speed);
210        }
211
212        Ok(())
213    }
214
215    /// Set synthesis pitch
216    pub async fn set_pitch(&mut self, pitch: f32) -> Result<()> {
217        self.current_pitch = pitch.clamp(-12.0, 12.0);
218
219        // Apply to pipeline if available
220        if let Some(ref pipeline) = self.pipeline {
221            // In a real implementation, this would configure the pipeline
222            println!("✓ Pitch set to: {:.1} semitones", self.current_pitch);
223        }
224
225        Ok(())
226    }
227
228    /// Set synthesis volume
229    pub async fn set_volume(&mut self, volume: f32) -> Result<()> {
230        self.current_volume = volume.clamp(0.0, 2.0);
231
232        // Apply to audio player
233        self.audio_player
234            .set_volume(self.current_volume)
235            .map_err(|e| VoirsCliError::AudioError(format!("Failed to set volume: {}", e)))?;
236
237        println!("✓ Volume set to: {:.1}", self.current_volume);
238
239        Ok(())
240    }
241
242    /// Get current synthesis parameters
243    pub fn current_params(&self) -> (f32, f32, f32) {
244        (self.current_speed, self.current_pitch, self.current_volume)
245    }
246
247    /// Get current voice
248    pub fn current_voice(&self) -> Option<&str> {
249        self.current_voice.as_deref()
250    }
251
252    /// Check if synthesis engine is ready
253    pub fn is_ready(&self) -> bool {
254        self.pipeline.is_some()
255    }
256}