1use anyhow::Result;
2use async_trait::async_trait;
3use bytes::Bytes;
4use futures::stream::BoxStream;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use tokio::sync::mpsc;
8mod aliyun;
9mod deepgram;
10mod tencent_cloud;
11mod tencent_cloud_basic;
12mod voiceapi;
13pub use aliyun::AliyunTtsClient;
14pub use deepgram::DeepegramTtsClient;
15pub use tencent_cloud::TencentCloudTtsClient;
16pub use tencent_cloud_basic::TencentCloudTtsBasicClient;
17pub use voiceapi::VoiceApiTtsClient;
18
19#[derive(Clone, Default)]
20pub struct SynthesisCommand {
21 pub text: String,
22 pub speaker: Option<String>,
23 pub play_id: Option<String>,
24 pub streaming: bool,
25 pub end_of_stream: bool,
26 pub option: SynthesisOption,
27 pub base64: bool,
28}
29pub type SynthesisCommandSender = mpsc::UnboundedSender<SynthesisCommand>;
30pub type SynthesisCommandReceiver = mpsc::UnboundedReceiver<SynthesisCommand>;
31pub use self::tencent_cloud::strip_emoji_chars;
32
33#[derive(Debug, Clone, Serialize, Hash, Eq, PartialEq)]
34pub enum SynthesisType {
35 #[serde(rename = "tencent")]
36 TencentCloud,
37 #[serde(rename = "voiceapi")]
38 VoiceApi,
39 #[serde(rename = "aliyun")]
40 Aliyun,
41 #[serde(rename = "deepgram")]
42 Deepgram,
43 #[serde(rename = "other")]
44 Other(String),
45}
46
47impl std::fmt::Display for SynthesisType {
48 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49 match self {
50 SynthesisType::TencentCloud => write!(f, "tencent"),
51 SynthesisType::VoiceApi => write!(f, "voiceapi"),
52 SynthesisType::Aliyun => write!(f, "aliyun"),
53 SynthesisType::Deepgram => write!(f, "deepgram"),
54 SynthesisType::Other(provider) => write!(f, "{}", provider),
55 }
56 }
57}
58
59impl<'de> Deserialize<'de> for SynthesisType {
60 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
61 where
62 D: serde::Deserializer<'de>,
63 {
64 let value = String::deserialize(deserializer)?;
65 match value.as_str() {
66 "tencent" => Ok(SynthesisType::TencentCloud),
67 "voiceapi" => Ok(SynthesisType::VoiceApi),
68 "aliyun" => Ok(SynthesisType::Aliyun),
69 "deepgram" => Ok(SynthesisType::Deepgram),
70 _ => Ok(SynthesisType::Other(value)),
71 }
72 }
73}
74
75#[cfg(test)]
76mod tests;
77#[derive(Debug, Clone, Deserialize, Serialize)]
78#[serde(rename_all = "camelCase")]
79#[serde(default)]
80pub struct SynthesisOption {
81 pub samplerate: Option<i32>,
82 pub provider: Option<SynthesisType>,
83 pub speed: Option<f32>,
84 pub app_id: Option<String>,
85 pub secret_id: Option<String>,
86 pub secret_key: Option<String>,
87 pub volume: Option<i32>,
88 pub speaker: Option<String>,
89 pub codec: Option<String>,
90 pub subtitle: Option<bool>,
91 pub emotion: Option<String>,
94 pub endpoint: Option<String>,
95 pub extra: Option<HashMap<String, String>>,
96 pub max_concurrent_tasks: Option<usize>,
97}
98
99impl SynthesisOption {
100 pub fn merge_with(&self, option: Option<SynthesisOption>) -> Self {
101 if let Some(other) = option {
102 Self {
103 samplerate: other.samplerate.or(self.samplerate),
104 provider: other.provider.or(self.provider.clone()),
105 speed: other.speed.or(self.speed),
106 app_id: other.app_id.or(self.app_id.clone()),
107 secret_id: other.secret_id.or(self.secret_id.clone()),
108 secret_key: other.secret_key.or(self.secret_key.clone()),
109 volume: other.volume.or(self.volume),
110 speaker: other.speaker.or(self.speaker.clone()),
111 codec: other.codec.or(self.codec.clone()),
112 subtitle: other.subtitle.or(self.subtitle),
113 emotion: other.emotion.or(self.emotion.clone()),
114 endpoint: other.endpoint.or(self.endpoint.clone()),
115 extra: other.extra.or(self.extra.clone()),
116 max_concurrent_tasks: other.max_concurrent_tasks.or(self.max_concurrent_tasks),
117 }
118 } else {
119 self.clone()
120 }
121 }
122}
123
124#[derive(Debug)]
125pub enum SynthesisEvent {
126 AudioChunk(Bytes),
128 Subtitles(Vec<Subtitle>),
130 Finished,
131}
132
133#[derive(Debug, Clone)]
134pub struct Subtitle {
135 pub text: String,
136 pub begin_time: u32,
137 pub end_time: u32,
138 pub begin_index: u32,
139 pub end_index: u32,
140}
141
142impl Subtitle {
143 pub fn new(
144 text: String,
145 begin_time: u32,
146 end_time: u32,
147 begin_index: u32,
148 end_index: u32,
149 ) -> Self {
150 Self {
151 text,
152 begin_time,
153 end_time,
154 begin_index,
155 end_index,
156 }
157 }
158}
159
160pub fn bytes_size_to_duration(bytes: usize, sample_rate: u32) -> u32 {
162 (500.0 * bytes as f32 / sample_rate as f32) as u32
163}
164
165#[async_trait]
166pub trait SynthesisClient: Send {
167 fn provider(&self) -> SynthesisType;
169
170 async fn start(
173 &mut self,
174 ) -> Result<BoxStream<'static, (Option<usize>, Result<SynthesisEvent>)>>;
175
176 async fn synthesize(
180 &mut self,
181 text: &str,
182 cmd_seq: Option<usize>,
183 option: Option<SynthesisOption>,
184 ) -> Result<()>;
185
186 async fn stop(&mut self) -> Result<()>;
187}
188
189impl Default for SynthesisOption {
190 fn default() -> Self {
191 Self {
192 samplerate: Some(16000),
193 provider: None,
194 speed: Some(1.0),
195 app_id: None,
196 secret_id: None,
197 secret_key: None,
198 volume: Some(5), speaker: None,
200 codec: Some("pcm".to_string()),
201 subtitle: None,
202 emotion: None,
203 endpoint: None,
204 extra: None,
205 max_concurrent_tasks: None,
206 }
207 }
208}
209
210impl SynthesisOption {
211 pub fn check_default(&mut self) {
212 if let Some(provider) = &self.provider {
213 match provider.to_string().as_str() {
214 "tencent" | "tencent_basic" => {
215 if self.app_id.is_none() {
216 self.app_id = std::env::var("TENCENT_APPID").ok();
217 }
218 if self.secret_id.is_none() {
219 self.secret_id = std::env::var("TENCENT_SECRET_ID").ok();
220 }
221 if self.secret_key.is_none() {
222 self.secret_key = std::env::var("TENCENT_SECRET_KEY").ok();
223 }
224 }
225 "voiceapi" => {
226 if self.endpoint.is_none() {
228 self.endpoint = std::env::var("VOICEAPI_ENDPOINT")
229 .ok()
230 .or_else(|| Some("http://localhost:8000".to_string()));
231 }
232 if self.speaker.is_none() {
234 self.speaker = std::env::var("VOICEAPI_SPEAKER_ID")
235 .ok()
236 .or_else(|| Some("0".to_string()));
237 }
238 }
239 "aliyun" => {
240 if self.secret_key.is_none() {
241 self.secret_key = std::env::var("DASHSCOPE_API_KEY").ok();
242 }
243 }
244 "deepgram" => {
245 if self.secret_key.is_none() {
246 self.secret_key = std::env::var("DEEPGRAM_API_KEY").ok();
247 }
248 }
249 _ => {}
250 }
251 }
252 }
253}