llm_kit_elevenlabs/
lib.rs

1//! ElevenLabs provider for the LLM Kit.
2//!
3//! This crate provides an ElevenLabs provider implementation for the LLM Kit,
4//! supporting text-to-speech and speech-to-text models.
5//!
6//! # Features
7//!
8//! - **Text-to-Speech (TTS)**: Convert text to natural-sounding speech with multiple voices
9//! - **Speech-to-Text (STT)**: Transcribe audio files to text with timestamps
10//! - **Voice Settings**: Fine-tune stability, similarity, and style
11//! - **Speaker Diarization**: Identify different speakers in transcriptions
12//! - **Multiple Languages**: Support for 29+ languages
13//!
14//! # Quick Start
15//!
16//! ## Recommended: Using the Builder Pattern
17//!
18//! ```no_run
19//! use llm_kit_elevenlabs::ElevenLabsClient;
20//! use llm_kit_provider::Provider;
21//! use llm_kit_provider::speech_model::call_options::SpeechModelCallOptions;
22//!
23//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
24//! // Create provider using builder
25//! let provider = ElevenLabsClient::new()
26//!     .api_key("your-api-key")
27//!     .build();
28//!
29//! let model = provider.speech_model("eleven_multilingual_v2")?;
30//!
31//! // Generate speech
32//! let options = SpeechModelCallOptions {
33//!     text: "Hello, how are you?".to_string(),
34//!     voice: Some("21m00Tcm4TlvDq8ikWAM".to_string()),  // Rachel voice
35//!     output_format: Some("mp3_44100_128".to_string()),
36//!     speed: None,
37//!     language: None,
38//!     instructions: None,
39//!     headers: None,
40//!     provider_options: None,
41//!     abort_signal: None,
42//! };
43//!
44//! let result = model.do_generate(options).await?;
45//!
46//! println!("Generated {} bytes of audio", result.audio.len());
47//! # Ok(())
48//! # }
49//! ```
50//!
51//! ## Alternative: Direct Instantiation
52//!
53//! ```no_run
54//! use llm_kit_elevenlabs::{ElevenLabsProvider, ElevenLabsProviderSettings};
55//! use llm_kit_provider::Provider;
56//!
57//! let provider = ElevenLabsProvider::new(
58//!     ElevenLabsProviderSettings::new()
59//!         .with_api_key("your-api-key")
60//! );
61//! ```
62//!
63//! # Examples
64//!
65//! ## Text-to-Speech
66//!
67//! ```no_run
68//! use llm_kit_elevenlabs::ElevenLabsClient;
69//! use llm_kit_provider::Provider;
70//! use llm_kit_provider::speech_model::{AudioData, call_options::SpeechModelCallOptions};
71//!
72//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
73//! let provider = ElevenLabsClient::new()
74//!     .api_key("your-api-key")
75//!     .build();
76//!
77//! let model = provider.speech_model("eleven_multilingual_v2")?;
78//!
79//! let options = SpeechModelCallOptions {
80//!     text: "Hello, world!".to_string(),
81//!     voice: Some("21m00Tcm4TlvDq8ikWAM".to_string()),
82//!     output_format: Some("mp3_44100_128".to_string()),
83//!     speed: None,
84//!     language: None,
85//!     instructions: None,
86//!     headers: None,
87//!     provider_options: None,
88//!     abort_signal: None,
89//! };
90//!
91//! let result = model.do_generate(options).await?;
92//!
93//! // Extract audio bytes from the AudioData enum
94//! let audio_bytes = match result.audio {
95//!     AudioData::Binary(bytes) => bytes,
96//!     AudioData::Base64(base64_str) => {
97//!         use base64::{Engine as _, engine::general_purpose};
98//!         general_purpose::STANDARD.decode(base64_str)?
99//!     }
100//! };
101//!
102//! std::fs::write("output.mp3", audio_bytes)?;
103//! # Ok(())
104//! # }
105//! ```
106//!
107//! ## Speech-to-Text Transcription
108//!
109//! ```ignore
110//! use llm_kit_elevenlabs::ElevenLabsClient;
111//! use llm_kit_provider::{Provider, TranscriptionModel, TranscriptionInput, TranscriptionSettings};
112//! use llm_kit_provider_utils::DataContent;
113//! use std::fs;
114//!
115//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
116//! let provider = ElevenLabsClient::new()
117//!     .api_key("your-api-key")
118//!     .build();
119//!
120//! let model = provider.transcription_model("scribe_v1")?;
121//!
122//! let audio_bytes = fs::read("audio.mp3")?;
123//! let input = TranscriptionInput::Data(DataContent::from_bytes(audio_bytes, "audio/mpeg"));
124//! let result = model.do_transcribe(input, TranscriptionSettings::default()).await?;
125//!
126//! println!("Transcription: {}", result.text);
127//! # Ok(())
128//! # }
129//! ```
130//!
131//! ## Custom Configuration
132//!
133//! ```no_run
134//! use llm_kit_elevenlabs::ElevenLabsClient;
135//!
136//! let provider = ElevenLabsClient::new()
137//!     .api_key("your-api-key")
138//!     .base_url("https://custom-api.elevenlabs.io")
139//!     .header("X-Custom-Header", "custom-value")
140//!     .build();
141//! ```
142//!
143//! ## Provider-Specific Options
144//!
145//! ```ignore
146//! use llm_kit_elevenlabs::ElevenLabsClient;
147//! use llm_kit_provider::{Provider, SpeechModel, SpeechSettings};
148//! use std::collections::HashMap;
149//! use serde_json::json;
150//!
151//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
152//! let provider = ElevenLabsClient::new()
153//!     .api_key("your-api-key")
154//!     .build();
155//!
156//! let model = provider.speech_model("eleven_multilingual_v2")?;
157//!
158//! // Provider options with voice settings
159//! let mut provider_options = HashMap::new();
160//! provider_options.insert(
161//!     "elevenlabs".to_string(),
162//!     json!({
163//!         "voiceSettings": {
164//!             "stability": 0.7,
165//!             "similarityBoost": 0.8,
166//!             "style": 0.5,
167//!             "useSpeakerBoost": true
168//!         },
169//!         "seed": 12345
170//!     })
171//!     .as_object()
172//!     .unwrap()
173//!     .clone(),
174//! );
175//!
176//! let settings = SpeechSettings::default().with_provider_options(provider_options);
177//! let result = model.do_generate("Hello with custom voice settings!", settings).await?;
178//! # Ok(())
179//! # }
180//! ```
181
182/// Builder for creating ElevenLabs provider.
183pub mod client;
184
185/// Configuration for ElevenLabs models.
186pub mod config;
187
188/// Error handling for ElevenLabs API.
189pub mod error;
190
191/// Provider implementation.
192pub mod provider;
193
194/// Provider settings and configuration.
195pub mod settings;
196
197/// Speech (text-to-speech) model implementation.
198pub mod speech;
199
200/// Transcription (speech-to-text) model implementation.
201pub mod transcription;
202
203// Re-export main types
204pub use client::ElevenLabsClient;
205pub use provider::ElevenLabsProvider;
206pub use settings::ElevenLabsProviderSettings;
207
208// Re-export speech types
209pub use speech::{
210    ElevenLabsSpeechModel, ElevenLabsSpeechModelId, ElevenLabsSpeechProviderOptions,
211    ElevenLabsSpeechVoiceId,
212};
213
214// Re-export transcription types
215pub use transcription::{
216    ElevenLabsTranscriptionModel, ElevenLabsTranscriptionModelId,
217    ElevenLabsTranscriptionProviderOptions,
218};