1use anyhow::Result;
2use async_trait::async_trait;
3use bytes::Bytes;
4use futures::stream::BoxStream;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use tokio::sync::mpsc;
8
9mod aliyun;
10mod deepgram;
11mod msedge;
12mod tencent_cloud;
13mod tencent_cloud_basic;
14
15#[cfg(feature = "offline")]
16mod supertonic;
17
18pub use aliyun::AliyunTtsClient;
19pub use deepgram::DeepegramTtsClient;
20pub use msedge::MsEdgeTtsClient;
21pub use tencent_cloud::TencentCloudTtsClient;
22pub use tencent_cloud_basic::TencentCloudTtsBasicClient;
23
24#[cfg(feature = "offline")]
25pub use supertonic::SupertonicTtsClient;
26
27#[derive(Clone, Default)]
28pub struct SynthesisCommand {
29 pub text: String,
30 pub speaker: Option<String>,
31 pub play_id: Option<String>,
32 pub streaming: bool,
33 pub end_of_stream: bool,
34 pub option: SynthesisOption,
35 pub base64: bool,
36}
37pub type SynthesisCommandSender = mpsc::UnboundedSender<SynthesisCommand>;
38pub type SynthesisCommandReceiver = mpsc::UnboundedReceiver<SynthesisCommand>;
39
40#[derive(Debug, Clone, Serialize, Hash, Eq, PartialEq)]
41pub enum SynthesisType {
42 #[serde(rename = "tencent")]
43 TencentCloud,
44 #[serde(rename = "aliyun")]
45 Aliyun,
46 #[serde(rename = "deepgram")]
47 Deepgram,
48 #[serde(rename = "msedge")]
49 MsEdge,
50 #[cfg(feature = "offline")]
51 #[serde(rename = "supertonic")]
52 Supertonic,
53 #[serde(rename = "other")]
54 Other(String),
55}
56
57impl std::fmt::Display for SynthesisType {
58 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59 match self {
60 SynthesisType::TencentCloud => write!(f, "tencent"),
61 SynthesisType::Aliyun => write!(f, "aliyun"),
62 SynthesisType::Deepgram => write!(f, "deepgram"),
63 SynthesisType::MsEdge => write!(f, "msedge"),
64 #[cfg(feature = "offline")]
65 SynthesisType::Supertonic => write!(f, "supertonic"),
66 SynthesisType::Other(provider) => write!(f, "{}", provider),
67 }
68 }
69}
70
71impl<'de> Deserialize<'de> for SynthesisType {
72 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
73 where
74 D: serde::Deserializer<'de>,
75 {
76 let value = String::deserialize(deserializer)?;
77 match value.as_str() {
78 "tencent" => Ok(SynthesisType::TencentCloud),
79 "aliyun" => Ok(SynthesisType::Aliyun),
80 "deepgram" => Ok(SynthesisType::Deepgram),
81 "msedge" => Ok(SynthesisType::MsEdge),
82 #[cfg(feature = "offline")]
83 "supertonic" => Ok(SynthesisType::Supertonic),
84 _ => Ok(SynthesisType::Other(value)),
85 }
86 }
87}
88
89#[cfg(test)]
90mod tests;
91#[derive(Debug, Clone, Deserialize, Serialize)]
92#[serde(rename_all = "camelCase")]
93#[serde(default)]
94pub struct SynthesisOption {
95 pub samplerate: Option<i32>,
96 pub provider: Option<SynthesisType>,
97 pub speed: Option<f32>,
98 pub app_id: Option<String>,
99 pub secret_id: Option<String>,
100 pub secret_key: Option<String>,
101 pub volume: Option<i32>,
102 pub speaker: Option<String>,
103 pub codec: Option<String>,
104 pub subtitle: Option<bool>,
105 pub model: Option<String>,
106 pub language: Option<String>,
107 pub emotion: Option<String>,
110 pub endpoint: Option<String>,
111 pub extra: Option<HashMap<String, String>>,
112 pub max_concurrent_tasks: Option<usize>,
113}
114
115impl SynthesisOption {
116 pub fn merge_with(&self, option: Option<SynthesisOption>) -> Self {
117 if let Some(other) = option {
118 Self {
119 samplerate: other.samplerate.or(self.samplerate),
120 provider: other.provider.or(self.provider.clone()),
121 speed: other.speed.or(self.speed),
122 app_id: other.app_id.or(self.app_id.clone()),
123 secret_id: other.secret_id.or(self.secret_id.clone()),
124 secret_key: other.secret_key.or(self.secret_key.clone()),
125 volume: other.volume.or(self.volume),
126 speaker: other.speaker.or(self.speaker.clone()),
127 codec: other.codec.or(self.codec.clone()),
128 subtitle: other.subtitle.or(self.subtitle),
129 model: other.model.or(self.model.clone()),
130 language: other.language.or(self.language.clone()),
131 emotion: other.emotion.or(self.emotion.clone()),
132 endpoint: other.endpoint.or(self.endpoint.clone()),
133 extra: other.extra.or(self.extra.clone()),
134 max_concurrent_tasks: other.max_concurrent_tasks.or(self.max_concurrent_tasks),
135 }
136 } else {
137 self.clone()
138 }
139 }
140}
141
142#[derive(Debug)]
143pub enum SynthesisEvent {
144 AudioChunk(Bytes),
146 Subtitles(Vec<Subtitle>),
148 Finished,
149}
150
151#[derive(Debug, Clone)]
152pub struct Subtitle {
153 pub text: String,
154 pub begin_time: u32,
155 pub end_time: u32,
156 pub begin_index: u32,
157 pub end_index: u32,
158}
159
160impl Subtitle {
161 pub fn new(
162 text: String,
163 begin_time: u32,
164 end_time: u32,
165 begin_index: u32,
166 end_index: u32,
167 ) -> Self {
168 Self {
169 text,
170 begin_time,
171 end_time,
172 begin_index,
173 end_index,
174 }
175 }
176}
177
178pub fn bytes_size_to_duration(bytes: usize, sample_rate: u32) -> u32 {
180 (500.0 * bytes as f32 / sample_rate as f32) as u32
181}
182
183#[async_trait]
184pub trait SynthesisClient: Send {
185 fn provider(&self) -> SynthesisType;
187
188 async fn start(
191 &mut self,
192 ) -> Result<BoxStream<'static, (Option<usize>, Result<SynthesisEvent>)>>;
193
194 async fn synthesize(
198 &mut self,
199 text: &str,
200 cmd_seq: Option<usize>,
201 option: Option<SynthesisOption>,
202 ) -> Result<()>;
203
204 async fn stop(&mut self) -> Result<()>;
205}
206
207impl Default for SynthesisOption {
208 fn default() -> Self {
209 Self {
210 samplerate: Some(16000),
211 provider: None,
212 speed: Some(1.0),
213 app_id: None,
214 secret_id: None,
215 secret_key: None,
216 volume: Some(5), speaker: None,
218 codec: Some("pcm".to_string()),
219 subtitle: None,
220 model: None,
221 language: None,
222 emotion: None,
223 endpoint: None,
224 extra: None,
225 max_concurrent_tasks: None,
226 }
227 }
228}
229
230impl SynthesisOption {
231 pub fn check_default(&mut self) {
232 if let Some(provider) = &self.provider {
233 match provider.to_string().as_str() {
234 "tencent" | "tencent_basic" => {
235 if self.app_id.is_none() {
236 self.app_id = std::env::var("TENCENT_APPID").ok();
237 }
238 if self.secret_id.is_none() {
239 self.secret_id = std::env::var("TENCENT_SECRET_ID").ok();
240 }
241 if self.secret_key.is_none() {
242 self.secret_key = std::env::var("TENCENT_SECRET_KEY").ok();
243 }
244 }
245 "voiceapi" => {
246 if self.endpoint.is_none() {
248 self.endpoint = std::env::var("VOICEAPI_ENDPOINT")
249 .ok()
250 .or_else(|| Some("http://localhost:8000".to_string()));
251 }
252 if self.speaker.is_none() {
254 self.speaker = std::env::var("VOICEAPI_SPEAKER_ID")
255 .ok()
256 .or_else(|| Some("0".to_string()));
257 }
258 }
259 "aliyun" => {
260 if self.secret_key.is_none() {
261 self.secret_key = std::env::var("DASHSCOPE_API_KEY").ok();
262 }
263 }
264 "deepgram" => {
265 if self.secret_key.is_none() {
266 self.secret_key = std::env::var("DEEPGRAM_API_KEY").ok();
267 }
268 }
269 _ => {}
270 }
271 }
272 }
273}