llm/backends/
elevenlabs.rs1use crate::chat::{ChatMessage, ChatProvider, ChatResponse, Tool};
2use crate::completion::{CompletionProvider, CompletionRequest, CompletionResponse};
3use crate::embedding::EmbeddingProvider;
4#[cfg(feature = "elevenlabs")]
5use crate::error::LLMError;
6use crate::models::ModelsProvider;
7use crate::stt::SpeechToTextProvider;
8use crate::tts::TextToSpeechProvider;
9use crate::LLMProvider;
10use async_trait::async_trait;
11use reqwest::Client;
12use serde::{Deserialize, Serialize};
13use std::time::Duration;
14
15pub struct ElevenLabs {
20 api_key: String,
22 model_id: String,
24 base_url: String,
26 timeout_seconds: Option<u64>,
28 client: Client,
30 voice: Option<String>,
32}
33
34#[derive(Debug, Deserialize)]
36struct ElevenLabsWord {
37 text: String,
39 #[serde(default)]
41 start: f32,
42 #[serde(default)]
44 end: f32,
45}
46
47#[derive(Debug, Deserialize, Serialize, Clone)]
49pub struct Word {
50 pub text: String,
52 pub start: f32,
54 pub end: f32,
56}
57
58#[allow(dead_code)]
60#[derive(Debug, Deserialize)]
61struct ElevenLabsResponse {
62 #[serde(skip_serializing_if = "Option::is_none")]
64 language_code: Option<String>,
65 #[serde(skip_serializing_if = "Option::is_none")]
67 language_probability: Option<f32>,
68 #[serde(skip_serializing_if = "Option::is_none")]
70 text: String,
71 words: Option<Vec<ElevenLabsWord>>,
73}
74
75impl ElevenLabs {
76 pub fn new(
89 api_key: String,
90 model_id: String,
91 base_url: String,
92 timeout_seconds: Option<u64>,
93 voice: Option<String>,
94 ) -> Self {
95 Self {
96 api_key,
97 model_id,
98 base_url,
99 timeout_seconds,
100 client: Client::new(),
101 voice,
102 }
103 }
104}
105
106#[async_trait]
107impl SpeechToTextProvider for ElevenLabs {
108 async fn transcribe(&self, audio: Vec<u8>) -> Result<String, LLMError> {
119 let url = format!("{}/speech-to-text", self.base_url);
120 let part = reqwest::multipart::Part::bytes(audio).file_name("audio.wav");
121 let form = reqwest::multipart::Form::new()
122 .text("model_id", self.model_id.clone())
123 .part("file", part);
124
125 let mut req = self
126 .client
127 .post(url)
128 .header("xi-api-key", &self.api_key)
129 .multipart(form);
130
131 if let Some(t) = self.timeout_seconds {
132 req = req.timeout(Duration::from_secs(t));
133 }
134
135 let resp = req.send().await?.error_for_status()?;
136 let text = resp.text().await?;
137 let raw = text.clone();
138 let parsed: ElevenLabsResponse =
139 serde_json::from_str(&text).map_err(|e| LLMError::ResponseFormatError {
140 message: e.to_string(),
141 raw_response: raw,
142 })?;
143
144 let words: Option<Vec<Word>> = parsed.words.map(|ws| {
145 ws.into_iter()
146 .map(|w| Word {
147 text: w.text,
148 start: w.start,
149 end: w.end,
150 })
151 .collect()
152 });
153
154 Ok(words
155 .unwrap_or_default()
156 .into_iter()
157 .map(|w| w.text)
158 .collect())
159 }
160
161 async fn transcribe_file(&self, file_path: &str) -> Result<String, LLMError> {
172 let url = format!("{}/speech-to-text", self.base_url);
173 let form = reqwest::multipart::Form::new()
174 .text("model_id", self.model_id.clone())
175 .file("file", file_path)
176 .await
177 .map_err(|e| LLMError::HttpError(e.to_string()))?;
178
179 let mut req = self
180 .client
181 .post(url)
182 .header("xi-api-key", &self.api_key)
183 .multipart(form);
184
185 if let Some(t) = self.timeout_seconds {
186 req = req.timeout(Duration::from_secs(t));
187 }
188
189 let resp = req.send().await?.error_for_status()?;
190 let text = resp.text().await?;
191 let raw = text.clone();
192 let parsed: ElevenLabsResponse =
193 serde_json::from_str(&text).map_err(|e| LLMError::ResponseFormatError {
194 message: e.to_string(),
195 raw_response: raw,
196 })?;
197
198 let words: Option<Vec<Word>> = parsed.words.map(|ws| {
199 ws.into_iter()
200 .map(|w| Word {
201 text: w.text,
202 start: w.start,
203 end: w.end,
204 })
205 .collect()
206 });
207
208 Ok(words
209 .unwrap_or_default()
210 .into_iter()
211 .map(|w| w.text)
212 .collect())
213 }
214}
215
216#[async_trait]
217impl CompletionProvider for ElevenLabs {
218 async fn complete(&self, _req: &CompletionRequest) -> Result<CompletionResponse, LLMError> {
220 Ok(CompletionResponse {
221 text: "ElevenLabs completion not implemented.".into(),
222 })
223 }
224}
225
226#[async_trait]
227impl EmbeddingProvider for ElevenLabs {
228 async fn embed(&self, _text: Vec<String>) -> Result<Vec<Vec<f32>>, LLMError> {
230 Err(LLMError::ProviderError(
231 "Embedding not supported".to_string(),
232 ))
233 }
234}
235
236#[async_trait]
237impl ChatProvider for ElevenLabs {
238 async fn chat(&self, _messages: &[ChatMessage]) -> Result<Box<dyn ChatResponse>, LLMError> {
240 Err(LLMError::ProviderError("Chat not supported".to_string()))
241 }
242
243 async fn chat_with_tools(
245 &self,
246 _messages: &[ChatMessage],
247 _tools: Option<&[Tool]>,
248 ) -> Result<Box<dyn ChatResponse>, LLMError> {
249 Err(LLMError::ProviderError(
250 "Chat with tools not supported".to_string(),
251 ))
252 }
253}
254
255#[async_trait]
256impl ModelsProvider for ElevenLabs {}
257
258impl LLMProvider for ElevenLabs {
259 fn tools(&self) -> Option<&[Tool]> {
261 None
262 }
263}
264
265#[async_trait]
266impl TextToSpeechProvider for ElevenLabs {
267 async fn speech(&self, text: &str) -> Result<Vec<u8>, LLMError> {
279 let url = format!(
280 "{}/text-to-speech/{}?output_format=mp3_44100_128",
281 self.base_url,
282 self.voice
283 .clone()
284 .unwrap_or("JBFqnCBsd6RMkjVDRZzb".to_string())
285 );
286
287 let body = serde_json::json!({
288 "text": text,
289 "model_id": self.model_id
290 });
291
292 let mut req = self
293 .client
294 .post(url)
295 .header("xi-api-key", &self.api_key)
296 .header("Content-Type", "application/json")
297 .json(&body);
298
299 if let Some(t) = self.timeout_seconds {
300 req = req.timeout(Duration::from_secs(t));
301 }
302
303 let resp = req.send().await?.error_for_status()?;
304 let audio_data = resp.bytes().await?;
305
306 Ok(audio_data.to_vec())
307 }
308}