1use anyhow::Result;
2use async_trait::async_trait;
3use bytes::Bytes;
4use futures::stream::BoxStream;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use tokio::sync::mpsc;
8
9mod aliyun;
10mod deepgram;
11mod tencent_cloud;
12mod tencent_cloud_basic;
13
14#[cfg(feature = "offline")]
15mod supertonic;
16
17pub use aliyun::AliyunTtsClient;
18pub use deepgram::DeepegramTtsClient;
19pub use tencent_cloud::TencentCloudTtsClient;
20pub use tencent_cloud_basic::TencentCloudTtsBasicClient;
21
22#[cfg(feature = "offline")]
23pub use supertonic::SupertonicTtsClient;
24
25#[derive(Clone, Default)]
26pub struct SynthesisCommand {
27 pub text: String,
28 pub speaker: Option<String>,
29 pub play_id: Option<String>,
30 pub streaming: bool,
31 pub end_of_stream: bool,
32 pub option: SynthesisOption,
33 pub base64: bool,
34 pub cache_key: Option<String>,
35}
36pub type SynthesisCommandSender = mpsc::UnboundedSender<SynthesisCommand>;
37pub type SynthesisCommandReceiver = mpsc::UnboundedReceiver<SynthesisCommand>;
38
39#[derive(Debug, Clone, Serialize, Hash, Eq, PartialEq)]
40pub enum SynthesisType {
41 #[serde(rename = "tencent")]
42 TencentCloud,
43 #[serde(rename = "aliyun")]
44 Aliyun,
45 #[serde(rename = "deepgram")]
46 Deepgram,
47 #[cfg(feature = "offline")]
48 #[serde(rename = "supertonic")]
49 Supertonic,
50 #[serde(rename = "other")]
51 Other(String),
52}
53
54impl std::fmt::Display for SynthesisType {
55 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56 match self {
57 SynthesisType::TencentCloud => write!(f, "tencent"),
58 SynthesisType::Aliyun => write!(f, "aliyun"),
59 SynthesisType::Deepgram => write!(f, "deepgram"),
60 #[cfg(feature = "offline")]
61 SynthesisType::Supertonic => write!(f, "supertonic"),
62 SynthesisType::Other(provider) => write!(f, "{}", provider),
63 }
64 }
65}
66
67impl<'de> Deserialize<'de> for SynthesisType {
68 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
69 where
70 D: serde::Deserializer<'de>,
71 {
72 let value = String::deserialize(deserializer)?;
73 match value.as_str() {
74 "tencent" => Ok(SynthesisType::TencentCloud),
75 "aliyun" => Ok(SynthesisType::Aliyun),
76 "deepgram" => Ok(SynthesisType::Deepgram),
77 #[cfg(feature = "offline")]
78 "supertonic" => Ok(SynthesisType::Supertonic),
79 _ => Ok(SynthesisType::Other(value)),
80 }
81 }
82}
83
84#[cfg(test)]
85mod tests;
86#[derive(Debug, Clone, Deserialize, Serialize)]
87#[serde(rename_all = "camelCase")]
88#[serde(default)]
89pub struct SynthesisOption {
90 pub samplerate: Option<i32>,
91 pub provider: Option<SynthesisType>,
92 pub speed: Option<f32>,
93 pub app_id: Option<String>,
94 pub secret_id: Option<String>,
95 pub secret_key: Option<String>,
96 pub volume: Option<i32>,
97 pub speaker: Option<String>,
98 pub codec: Option<String>,
99 pub subtitle: Option<bool>,
100 pub model: Option<String>,
101 pub language: Option<String>,
102 pub emotion: Option<String>,
105 pub endpoint: Option<String>,
106 pub extra: Option<HashMap<String, String>>,
107 pub max_concurrent_tasks: Option<usize>,
108 pub session_id: Option<String>,
109}
110
111impl SynthesisOption {
112 pub fn merge_with(&self, option: Option<SynthesisOption>) -> Self {
113 if let Some(other) = option {
114 Self {
115 samplerate: other.samplerate.or(self.samplerate),
116 provider: other.provider.or(self.provider.clone()),
117 speed: other.speed.or(self.speed),
118 app_id: other.app_id.or(self.app_id.clone()),
119 secret_id: other.secret_id.or(self.secret_id.clone()),
120 secret_key: other.secret_key.or(self.secret_key.clone()),
121 volume: other.volume.or(self.volume),
122 speaker: other.speaker.or(self.speaker.clone()),
123 codec: other.codec.or(self.codec.clone()),
124 subtitle: other.subtitle.or(self.subtitle),
125 model: other.model.or(self.model.clone()),
126 language: other.language.or(self.language.clone()),
127 emotion: other.emotion.or(self.emotion.clone()),
128 endpoint: other.endpoint.or(self.endpoint.clone()),
129 extra: other.extra.or(self.extra.clone()),
130 max_concurrent_tasks: other.max_concurrent_tasks.or(self.max_concurrent_tasks),
131 session_id: other.session_id.or(self.session_id.clone()),
132 }
133 } else {
134 self.clone()
135 }
136 }
137}
138
139#[derive(Debug)]
140pub enum SynthesisEvent {
141 AudioChunk(Bytes),
143 Subtitles(Vec<Subtitle>),
145 Finished,
146}
147
148#[derive(Debug, Clone)]
149pub struct Subtitle {
150 pub text: String,
151 pub begin_time: u32,
152 pub end_time: u32,
153 pub begin_index: u32,
154 pub end_index: u32,
155}
156
157impl Subtitle {
158 pub fn new(
159 text: String,
160 begin_time: u32,
161 end_time: u32,
162 begin_index: u32,
163 end_index: u32,
164 ) -> Self {
165 Self {
166 text,
167 begin_time,
168 end_time,
169 begin_index,
170 end_index,
171 }
172 }
173}
174
175pub fn bytes_size_to_duration(bytes: usize, sample_rate: u32) -> u32 {
177 (500.0 * bytes as f32 / sample_rate as f32) as u32
178}
179
180#[async_trait]
181pub trait SynthesisClient: Send {
182 fn provider(&self) -> SynthesisType;
184
185 async fn start(
188 &mut self,
189 ) -> Result<BoxStream<'static, (Option<usize>, Result<SynthesisEvent>)>>;
190
191 async fn synthesize(
195 &mut self,
196 text: &str,
197 cmd_seq: Option<usize>,
198 option: Option<SynthesisOption>,
199 ) -> Result<()>;
200
201 async fn stop(&mut self) -> Result<()>;
202}
203
204impl Default for SynthesisOption {
205 fn default() -> Self {
206 Self {
207 samplerate: Some(16000),
208 provider: None,
209 speed: Some(1.0),
210 app_id: None,
211 secret_id: None,
212 secret_key: None,
213 volume: Some(5), speaker: None,
215 codec: Some("pcm".to_string()),
216 subtitle: None,
217 model: None,
218 language: None,
219 emotion: None,
220 endpoint: None,
221 extra: None,
222 max_concurrent_tasks: None,
223 session_id: None,
224 }
225 }
226}
227
228impl SynthesisOption {
229 pub fn check_default(&mut self) {
230 if let Some(provider) = &self.provider {
231 match provider.to_string().as_str() {
232 "tencent" | "tencent_basic" => {
233 if self.app_id.is_none() {
234 self.app_id = std::env::var("TENCENT_APPID").ok();
235 }
236 if self.secret_id.is_none() {
237 self.secret_id = std::env::var("TENCENT_SECRET_ID").ok();
238 }
239 if self.secret_key.is_none() {
240 self.secret_key = std::env::var("TENCENT_SECRET_KEY").ok();
241 }
242 }
243 "voiceapi" => {
244 if self.endpoint.is_none() {
246 self.endpoint = std::env::var("VOICEAPI_ENDPOINT")
247 .ok()
248 .or_else(|| Some("http://localhost:8000".to_string()));
249 }
250 if self.speaker.is_none() {
252 self.speaker = std::env::var("VOICEAPI_SPEAKER_ID")
253 .ok()
254 .or_else(|| Some("0".to_string()));
255 }
256 }
257 "aliyun" => {
258 if self.secret_key.is_none() {
259 self.secret_key = std::env::var("DASHSCOPE_API_KEY").ok();
260 }
261 }
262 "deepgram" => {
263 if self.secret_key.is_none() {
264 self.secret_key = std::env::var("DEEPGRAM_API_KEY").ok();
265 }
266 }
267 _ => {}
268 }
269 }
270 }
271}