1use anyhow::Result;
2use async_trait::async_trait;
3use bytes::Bytes;
4use futures::stream::BoxStream;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use tokio::sync::mpsc;
8mod aliyun;
9mod deepgram;
10mod tencent_cloud;
11mod tencent_cloud_basic;
12mod voiceapi;
13pub use aliyun::AliyunTtsClient;
14pub use deepgram::DeepegramTtsClient;
15pub use tencent_cloud::TencentCloudTtsClient;
16pub use tencent_cloud_basic::TencentCloudTtsBasicClient;
17pub use voiceapi::VoiceApiTtsClient;
18
19#[derive(Clone, Default)]
20pub struct SynthesisCommand {
21 pub text: String,
22 pub speaker: Option<String>,
23 pub play_id: Option<String>,
24 pub streaming: bool,
25 pub end_of_stream: bool,
26 pub option: SynthesisOption,
27 pub base64: bool,
28}
29pub type SynthesisCommandSender = mpsc::UnboundedSender<SynthesisCommand>;
30pub type SynthesisCommandReceiver = mpsc::UnboundedReceiver<SynthesisCommand>;
31
32#[derive(Debug, Clone, Serialize, Hash, Eq, PartialEq)]
33pub enum SynthesisType {
34 #[serde(rename = "tencent")]
35 TencentCloud,
36 #[serde(rename = "voiceapi")]
37 VoiceApi,
38 #[serde(rename = "aliyun")]
39 Aliyun,
40 #[serde(rename = "deepgram")]
41 Deepgram,
42 #[serde(rename = "other")]
43 Other(String),
44}
45
46impl std::fmt::Display for SynthesisType {
47 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48 match self {
49 SynthesisType::TencentCloud => write!(f, "tencent"),
50 SynthesisType::VoiceApi => write!(f, "voiceapi"),
51 SynthesisType::Aliyun => write!(f, "aliyun"),
52 SynthesisType::Deepgram => write!(f, "deepgram"),
53 SynthesisType::Other(provider) => write!(f, "{}", provider),
54 }
55 }
56}
57
58impl<'de> Deserialize<'de> for SynthesisType {
59 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
60 where
61 D: serde::Deserializer<'de>,
62 {
63 let value = String::deserialize(deserializer)?;
64 match value.as_str() {
65 "tencent" => Ok(SynthesisType::TencentCloud),
66 "voiceapi" => Ok(SynthesisType::VoiceApi),
67 "aliyun" => Ok(SynthesisType::Aliyun),
68 "deepgram" => Ok(SynthesisType::Deepgram),
69 _ => Ok(SynthesisType::Other(value)),
70 }
71 }
72}
73
74#[cfg(test)]
75mod tests;
76#[derive(Debug, Clone, Deserialize, Serialize)]
77#[serde(rename_all = "camelCase")]
78#[serde(default)]
79pub struct SynthesisOption {
80 pub samplerate: Option<i32>,
81 pub provider: Option<SynthesisType>,
82 pub speed: Option<f32>,
83 pub app_id: Option<String>,
84 pub secret_id: Option<String>,
85 pub secret_key: Option<String>,
86 pub volume: Option<i32>,
87 pub speaker: Option<String>,
88 pub codec: Option<String>,
89 pub subtitle: Option<bool>,
90 pub model: Option<String>,
91 pub emotion: Option<String>,
94 pub endpoint: Option<String>,
95 pub extra: Option<HashMap<String, String>>,
96 pub max_concurrent_tasks: Option<usize>,
97}
98
99impl SynthesisOption {
100 pub fn merge_with(&self, option: Option<SynthesisOption>) -> Self {
101 if let Some(other) = option {
102 Self {
103 samplerate: other.samplerate.or(self.samplerate),
104 provider: other.provider.or(self.provider.clone()),
105 speed: other.speed.or(self.speed),
106 app_id: other.app_id.or(self.app_id.clone()),
107 secret_id: other.secret_id.or(self.secret_id.clone()),
108 secret_key: other.secret_key.or(self.secret_key.clone()),
109 volume: other.volume.or(self.volume),
110 speaker: other.speaker.or(self.speaker.clone()),
111 codec: other.codec.or(self.codec.clone()),
112 subtitle: other.subtitle.or(self.subtitle),
113 model: other.model.or(self.model.clone()),
114 emotion: other.emotion.or(self.emotion.clone()),
115 endpoint: other.endpoint.or(self.endpoint.clone()),
116 extra: other.extra.or(self.extra.clone()),
117 max_concurrent_tasks: other.max_concurrent_tasks.or(self.max_concurrent_tasks),
118 }
119 } else {
120 self.clone()
121 }
122 }
123}
124
125#[derive(Debug)]
126pub enum SynthesisEvent {
127 AudioChunk(Bytes),
129 Subtitles(Vec<Subtitle>),
131 Finished,
132}
133
134#[derive(Debug, Clone)]
135pub struct Subtitle {
136 pub text: String,
137 pub begin_time: u32,
138 pub end_time: u32,
139 pub begin_index: u32,
140 pub end_index: u32,
141}
142
143impl Subtitle {
144 pub fn new(
145 text: String,
146 begin_time: u32,
147 end_time: u32,
148 begin_index: u32,
149 end_index: u32,
150 ) -> Self {
151 Self {
152 text,
153 begin_time,
154 end_time,
155 begin_index,
156 end_index,
157 }
158 }
159}
160
161pub fn bytes_size_to_duration(bytes: usize, sample_rate: u32) -> u32 {
163 (500.0 * bytes as f32 / sample_rate as f32) as u32
164}
165
166#[async_trait]
167pub trait SynthesisClient: Send {
168 fn provider(&self) -> SynthesisType;
170
171 async fn start(
174 &mut self,
175 ) -> Result<BoxStream<'static, (Option<usize>, Result<SynthesisEvent>)>>;
176
177 async fn synthesize(
181 &mut self,
182 text: &str,
183 cmd_seq: Option<usize>,
184 option: Option<SynthesisOption>,
185 ) -> Result<()>;
186
187 async fn stop(&mut self) -> Result<()>;
188}
189
190impl Default for SynthesisOption {
191 fn default() -> Self {
192 Self {
193 samplerate: Some(16000),
194 provider: None,
195 speed: Some(1.0),
196 app_id: None,
197 secret_id: None,
198 secret_key: None,
199 volume: Some(5), speaker: None,
201 codec: Some("pcm".to_string()),
202 subtitle: None,
203 model: None,
204 emotion: None,
205 endpoint: None,
206 extra: None,
207 max_concurrent_tasks: None,
208 }
209 }
210}
211
212impl SynthesisOption {
213 pub fn check_default(&mut self) {
214 if let Some(provider) = &self.provider {
215 match provider.to_string().as_str() {
216 "tencent" | "tencent_basic" => {
217 if self.app_id.is_none() {
218 self.app_id = std::env::var("TENCENT_APPID").ok();
219 }
220 if self.secret_id.is_none() {
221 self.secret_id = std::env::var("TENCENT_SECRET_ID").ok();
222 }
223 if self.secret_key.is_none() {
224 self.secret_key = std::env::var("TENCENT_SECRET_KEY").ok();
225 }
226 }
227 "voiceapi" => {
228 if self.endpoint.is_none() {
230 self.endpoint = std::env::var("VOICEAPI_ENDPOINT")
231 .ok()
232 .or_else(|| Some("http://localhost:8000".to_string()));
233 }
234 if self.speaker.is_none() {
236 self.speaker = std::env::var("VOICEAPI_SPEAKER_ID")
237 .ok()
238 .or_else(|| Some("0".to_string()));
239 }
240 }
241 "aliyun" => {
242 if self.secret_key.is_none() {
243 self.secret_key = std::env::var("DASHSCOPE_API_KEY").ok();
244 }
245 }
246 "deepgram" => {
247 if self.secret_key.is_none() {
248 self.secret_key = std::env::var("DEEPGRAM_API_KEY").ok();
249 }
250 }
251 _ => {}
252 }
253 }
254 }
255}