1use anyhow::Result;
2use async_trait::async_trait;
3use bytes::Bytes;
4use futures::stream::BoxStream;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use tokio::sync::mpsc;
8
9mod aliyun;
10mod deepgram;
11mod tencent_cloud;
12mod tencent_cloud_basic;
13
14#[cfg(feature = "offline")]
15mod supertonic;
16
17pub use aliyun::AliyunTtsClient;
18pub use deepgram::DeepegramTtsClient;
19pub use tencent_cloud::TencentCloudTtsClient;
20pub use tencent_cloud_basic::TencentCloudTtsBasicClient;
21
22#[cfg(feature = "offline")]
23pub use supertonic::SupertonicTtsClient;
24
25#[derive(Clone, Default)]
26pub struct SynthesisCommand {
27 pub text: String,
28 pub speaker: Option<String>,
29 pub play_id: Option<String>,
30 pub streaming: bool,
31 pub end_of_stream: bool,
32 pub option: SynthesisOption,
33 pub base64: bool,
34}
35pub type SynthesisCommandSender = mpsc::UnboundedSender<SynthesisCommand>;
36pub type SynthesisCommandReceiver = mpsc::UnboundedReceiver<SynthesisCommand>;
37
38#[derive(Debug, Clone, Serialize, Hash, Eq, PartialEq)]
39pub enum SynthesisType {
40 #[serde(rename = "tencent")]
41 TencentCloud,
42 #[serde(rename = "aliyun")]
43 Aliyun,
44 #[serde(rename = "deepgram")]
45 Deepgram,
46 #[cfg(feature = "offline")]
47 #[serde(rename = "supertonic")]
48 Supertonic,
49 #[serde(rename = "other")]
50 Other(String),
51}
52
53impl std::fmt::Display for SynthesisType {
54 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55 match self {
56 SynthesisType::TencentCloud => write!(f, "tencent"),
57 SynthesisType::Aliyun => write!(f, "aliyun"),
58 SynthesisType::Deepgram => write!(f, "deepgram"),
59 #[cfg(feature = "offline")]
60 SynthesisType::Supertonic => write!(f, "supertonic"),
61 SynthesisType::Other(provider) => write!(f, "{}", provider),
62 }
63 }
64}
65
66impl<'de> Deserialize<'de> for SynthesisType {
67 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
68 where
69 D: serde::Deserializer<'de>,
70 {
71 let value = String::deserialize(deserializer)?;
72 match value.as_str() {
73 "tencent" => Ok(SynthesisType::TencentCloud),
74 "aliyun" => Ok(SynthesisType::Aliyun),
75 "deepgram" => Ok(SynthesisType::Deepgram),
76 #[cfg(feature = "offline")]
77 "supertonic" => Ok(SynthesisType::Supertonic),
78 _ => Ok(SynthesisType::Other(value)),
79 }
80 }
81}
82
83#[cfg(test)]
84mod tests;
85#[derive(Debug, Clone, Deserialize, Serialize)]
86#[serde(rename_all = "camelCase")]
87#[serde(default)]
88pub struct SynthesisOption {
89 pub samplerate: Option<i32>,
90 pub provider: Option<SynthesisType>,
91 pub speed: Option<f32>,
92 pub app_id: Option<String>,
93 pub secret_id: Option<String>,
94 pub secret_key: Option<String>,
95 pub volume: Option<i32>,
96 pub speaker: Option<String>,
97 pub codec: Option<String>,
98 pub subtitle: Option<bool>,
99 pub model: Option<String>,
100 pub language: Option<String>,
101 pub emotion: Option<String>,
104 pub endpoint: Option<String>,
105 pub extra: Option<HashMap<String, String>>,
106 pub max_concurrent_tasks: Option<usize>,
107}
108
109impl SynthesisOption {
110 pub fn merge_with(&self, option: Option<SynthesisOption>) -> Self {
111 if let Some(other) = option {
112 Self {
113 samplerate: other.samplerate.or(self.samplerate),
114 provider: other.provider.or(self.provider.clone()),
115 speed: other.speed.or(self.speed),
116 app_id: other.app_id.or(self.app_id.clone()),
117 secret_id: other.secret_id.or(self.secret_id.clone()),
118 secret_key: other.secret_key.or(self.secret_key.clone()),
119 volume: other.volume.or(self.volume),
120 speaker: other.speaker.or(self.speaker.clone()),
121 codec: other.codec.or(self.codec.clone()),
122 subtitle: other.subtitle.or(self.subtitle),
123 model: other.model.or(self.model.clone()),
124 language: other.language.or(self.language.clone()),
125 emotion: other.emotion.or(self.emotion.clone()),
126 endpoint: other.endpoint.or(self.endpoint.clone()),
127 extra: other.extra.or(self.extra.clone()),
128 max_concurrent_tasks: other.max_concurrent_tasks.or(self.max_concurrent_tasks),
129 }
130 } else {
131 self.clone()
132 }
133 }
134}
135
136#[derive(Debug)]
137pub enum SynthesisEvent {
138 AudioChunk(Bytes),
140 Subtitles(Vec<Subtitle>),
142 Finished,
143}
144
145#[derive(Debug, Clone)]
146pub struct Subtitle {
147 pub text: String,
148 pub begin_time: u32,
149 pub end_time: u32,
150 pub begin_index: u32,
151 pub end_index: u32,
152}
153
154impl Subtitle {
155 pub fn new(
156 text: String,
157 begin_time: u32,
158 end_time: u32,
159 begin_index: u32,
160 end_index: u32,
161 ) -> Self {
162 Self {
163 text,
164 begin_time,
165 end_time,
166 begin_index,
167 end_index,
168 }
169 }
170}
171
172pub fn bytes_size_to_duration(bytes: usize, sample_rate: u32) -> u32 {
174 (500.0 * bytes as f32 / sample_rate as f32) as u32
175}
176
177#[async_trait]
178pub trait SynthesisClient: Send {
179 fn provider(&self) -> SynthesisType;
181
182 async fn start(
185 &mut self,
186 ) -> Result<BoxStream<'static, (Option<usize>, Result<SynthesisEvent>)>>;
187
188 async fn synthesize(
192 &mut self,
193 text: &str,
194 cmd_seq: Option<usize>,
195 option: Option<SynthesisOption>,
196 ) -> Result<()>;
197
198 async fn stop(&mut self) -> Result<()>;
199}
200
201impl Default for SynthesisOption {
202 fn default() -> Self {
203 Self {
204 samplerate: Some(16000),
205 provider: None,
206 speed: Some(1.0),
207 app_id: None,
208 secret_id: None,
209 secret_key: None,
210 volume: Some(5), speaker: None,
212 codec: Some("pcm".to_string()),
213 subtitle: None,
214 model: None,
215 language: None,
216 emotion: None,
217 endpoint: None,
218 extra: None,
219 max_concurrent_tasks: None,
220 }
221 }
222}
223
224impl SynthesisOption {
225 pub fn check_default(&mut self) {
226 if let Some(provider) = &self.provider {
227 match provider.to_string().as_str() {
228 "tencent" | "tencent_basic" => {
229 if self.app_id.is_none() {
230 self.app_id = std::env::var("TENCENT_APPID").ok();
231 }
232 if self.secret_id.is_none() {
233 self.secret_id = std::env::var("TENCENT_SECRET_ID").ok();
234 }
235 if self.secret_key.is_none() {
236 self.secret_key = std::env::var("TENCENT_SECRET_KEY").ok();
237 }
238 }
239 "voiceapi" => {
240 if self.endpoint.is_none() {
242 self.endpoint = std::env::var("VOICEAPI_ENDPOINT")
243 .ok()
244 .or_else(|| Some("http://localhost:8000".to_string()));
245 }
246 if self.speaker.is_none() {
248 self.speaker = std::env::var("VOICEAPI_SPEAKER_ID")
249 .ok()
250 .or_else(|| Some("0".to_string()));
251 }
252 }
253 "aliyun" => {
254 if self.secret_key.is_none() {
255 self.secret_key = std::env::var("DASHSCOPE_API_KEY").ok();
256 }
257 }
258 "deepgram" => {
259 if self.secret_key.is_none() {
260 self.secret_key = std::env::var("DEEPGRAM_API_KEY").ok();
261 }
262 }
263 _ => {}
264 }
265 }
266 }
267}