active_call/synthesis/
supertonic.rs1use crate::offline::get_offline_models;
2use crate::synthesis::{SynthesisClient, SynthesisEvent, SynthesisOption, SynthesisType};
3use anyhow::{Result, anyhow};
4use async_trait::async_trait;
5use audio_codec::Resampler;
6use bytes::Bytes;
7use futures::stream::BoxStream;
8use tokio::sync::mpsc;
9use tokio_stream::wrappers::UnboundedReceiverStream;
10use tokio_util::sync::CancellationToken;
11use tracing::{debug, warn};
12
13pub struct SupertonicTtsClient {
14 voice_style: String,
15 speed: f32,
16 target_rate: i32,
17 tx: Option<mpsc::UnboundedSender<(Option<usize>, Result<SynthesisEvent>)>>,
18 token: CancellationToken,
19}
20
21impl SupertonicTtsClient {
22 pub fn create(_streaming: bool, option: &SynthesisOption) -> Result<Box<dyn SynthesisClient>> {
23 let voice_style = option.speaker.clone().unwrap_or_else(|| "M1".to_string());
24 let speed = option.speed.unwrap_or(1.0);
25 let target_rate = option.samplerate.unwrap_or(16000);
26
27 Ok(Box::new(Self {
28 voice_style,
29 speed,
30 target_rate,
31 tx: None,
32 token: CancellationToken::new(),
33 }))
34 }
35
36 fn ensure_models_initialized() -> Result<()> {
37 if get_offline_models().is_none() {
38 anyhow::bail!(
39 "Offline models not initialized. Please call init_offline_models() first."
40 );
41 }
42 Ok(())
43 }
44
45 async fn synthesize_text(&self, text: String, cmd_seq: Option<usize>) -> Result<()> {
46 let Some(tx) = self.tx.as_ref() else {
47 return Ok(());
48 };
49
50 let models =
51 get_offline_models().ok_or_else(|| anyhow!("offline models not initialized"))?;
52
53 let voice_style = self.voice_style.clone();
54 let speed = self.speed;
55 let target_rate = self.target_rate;
56 let tx_clone = tx.clone();
57 let language = "en".to_string();
59
60 let tts_arc = models.get_supertonic().await?;
61
62 tokio::task::spawn_blocking(move || {
64 let mut guard = tts_arc.blocking_write();
67
68 if let Some(tts) = guard.as_mut() {
69 debug!(
70 text = %text,
71 voice = %voice_style,
72 speed = speed,
73 target_rate = target_rate,
74 "Calling Supertonic TTS synthesis"
75 );
76
77 match tts.synthesize(&text, &language, Some(&voice_style), Some(speed)) {
78 Ok(samples) => {
79 if !samples.is_empty() {
80 let mut samples_i16: Vec<i16> = samples
81 .iter()
82 .map(|&s| (s * 32768.0).max(-32768.0).min(32767.0) as i16)
83 .collect();
84
85 if tts.sample_rate() != target_rate {
87 let mut resampler = Resampler::new(
88 tts.sample_rate() as usize,
89 target_rate as usize,
90 );
91 samples_i16 = resampler.resample(&samples_i16);
92 }
93
94 let mut bytes = Vec::with_capacity(samples_i16.len() * 2);
96 for s in samples_i16 {
97 bytes.extend_from_slice(&s.to_le_bytes());
98 }
99
100 let _ = tx_clone.send((
102 cmd_seq,
103 Ok(SynthesisEvent::AudioChunk(Bytes::from(bytes))),
104 ));
105
106 let _ = tx_clone.send((cmd_seq, Ok(SynthesisEvent::Finished)));
108 } else {
109 warn!("Supertonic produced empty audio");
110 let _ = tx_clone.send((cmd_seq, Ok(SynthesisEvent::Finished)));
111 }
112 }
113 Err(e) => {
114 warn!(error = %e, "Supertonic inference failed");
115 let _ = tx_clone.send((cmd_seq, Err(anyhow!("Synthesis failed: {}", e))));
116 }
117 }
118 } else {
119 warn!("Supertonic TTS not initialized");
120 let _ = tx_clone.send((cmd_seq, Err(anyhow!("TTS not initialized"))));
121 }
122 })
123 .await
124 .map_err(|e| anyhow!("task join error: {}", e))?;
125
126 Ok(())
127 }
128}
129
130#[async_trait]
131impl SynthesisClient for SupertonicTtsClient {
132 fn provider(&self) -> SynthesisType {
133 SynthesisType::Supertonic
134 }
135
136 async fn start(
137 &mut self,
138 ) -> Result<BoxStream<'static, (Option<usize>, Result<SynthesisEvent>)>> {
139 Self::ensure_models_initialized()?;
140
141 let (tx, rx) = mpsc::unbounded_channel();
142 self.tx = Some(tx);
143
144 let models =
146 get_offline_models().ok_or_else(|| anyhow!("offline models not initialized"))?;
147 models.init_supertonic().await?;
148
149 debug!(
150 "SupertonicTtsClient started with voice: {}",
151 self.voice_style
152 );
153
154 Ok(Box::pin(UnboundedReceiverStream::new(rx)))
155 }
156
157 async fn synthesize(
158 &mut self,
159 text: &str,
160 cmd_seq: Option<usize>,
161 _option: Option<SynthesisOption>,
162 ) -> Result<()> {
163 self.synthesize_text(text.to_string(), cmd_seq).await
164 }
165
166 async fn stop(&mut self) -> Result<()> {
167 self.token.cancel();
168 self.tx = None;
169 Ok(())
170 }
171}