llm_kit_elevenlabs/lib.rs
1//! ElevenLabs provider for the LLM Kit.
2//!
3//! This crate provides an ElevenLabs provider implementation for the LLM Kit,
4//! supporting text-to-speech and speech-to-text models.
5//!
6//! # Features
7//!
8//! - **Text-to-Speech (TTS)**: Convert text to natural-sounding speech with multiple voices
9//! - **Speech-to-Text (STT)**: Transcribe audio files to text with timestamps
10//! - **Voice Settings**: Fine-tune stability, similarity, and style
11//! - **Speaker Diarization**: Identify different speakers in transcriptions
12//! - **Multiple Languages**: Support for 29+ languages
13//!
14//! # Quick Start
15//!
16//! ## Recommended: Using the Builder Pattern
17//!
18//! ```no_run
19//! use llm_kit_elevenlabs::ElevenLabsClient;
20//! use llm_kit_provider::Provider;
21//! use llm_kit_provider::speech_model::call_options::SpeechModelCallOptions;
22//!
23//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
24//! // Create provider using builder
25//! let provider = ElevenLabsClient::new()
26//! .api_key("your-api-key")
27//! .build();
28//!
29//! let model = provider.speech_model("eleven_multilingual_v2")?;
30//!
31//! // Generate speech
32//! let options = SpeechModelCallOptions {
33//! text: "Hello, how are you?".to_string(),
34//! voice: Some("21m00Tcm4TlvDq8ikWAM".to_string()), // Rachel voice
35//! output_format: Some("mp3_44100_128".to_string()),
36//! speed: None,
37//! language: None,
38//! instructions: None,
39//! headers: None,
40//! provider_options: None,
41//! abort_signal: None,
42//! };
43//!
44//! let result = model.do_generate(options).await?;
45//!
46//! println!("Generated {} bytes of audio", result.audio.len());
47//! # Ok(())
48//! # }
49//! ```
50//!
51//! ## Alternative: Direct Instantiation
52//!
53//! ```no_run
54//! use llm_kit_elevenlabs::{ElevenLabsProvider, ElevenLabsProviderSettings};
55//! use llm_kit_provider::Provider;
56//!
57//! let provider = ElevenLabsProvider::new(
58//! ElevenLabsProviderSettings::new()
59//! .with_api_key("your-api-key")
60//! );
61//! ```
62//!
63//! # Examples
64//!
65//! ## Text-to-Speech
66//!
67//! ```no_run
68//! use llm_kit_elevenlabs::ElevenLabsClient;
69//! use llm_kit_provider::Provider;
70//! use llm_kit_provider::speech_model::{AudioData, call_options::SpeechModelCallOptions};
71//!
72//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
73//! let provider = ElevenLabsClient::new()
74//! .api_key("your-api-key")
75//! .build();
76//!
77//! let model = provider.speech_model("eleven_multilingual_v2")?;
78//!
79//! let options = SpeechModelCallOptions {
80//! text: "Hello, world!".to_string(),
81//! voice: Some("21m00Tcm4TlvDq8ikWAM".to_string()),
82//! output_format: Some("mp3_44100_128".to_string()),
83//! speed: None,
84//! language: None,
85//! instructions: None,
86//! headers: None,
87//! provider_options: None,
88//! abort_signal: None,
89//! };
90//!
91//! let result = model.do_generate(options).await?;
92//!
93//! // Extract audio bytes from the AudioData enum
94//! let audio_bytes = match result.audio {
95//! AudioData::Binary(bytes) => bytes,
96//! AudioData::Base64(base64_str) => {
97//! use base64::{Engine as _, engine::general_purpose};
98//! general_purpose::STANDARD.decode(base64_str)?
99//! }
100//! };
101//!
102//! std::fs::write("output.mp3", audio_bytes)?;
103//! # Ok(())
104//! # }
105//! ```
106//!
107//! ## Speech-to-Text Transcription
108//!
109//! ```ignore
110//! use llm_kit_elevenlabs::ElevenLabsClient;
111//! use llm_kit_provider::{Provider, TranscriptionModel, TranscriptionInput, TranscriptionSettings};
112//! use llm_kit_provider_utils::DataContent;
113//! use std::fs;
114//!
115//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
116//! let provider = ElevenLabsClient::new()
117//! .api_key("your-api-key")
118//! .build();
119//!
120//! let model = provider.transcription_model("scribe_v1")?;
121//!
122//! let audio_bytes = fs::read("audio.mp3")?;
123//! let input = TranscriptionInput::Data(DataContent::from_bytes(audio_bytes, "audio/mpeg"));
124//! let result = model.do_transcribe(input, TranscriptionSettings::default()).await?;
125//!
126//! println!("Transcription: {}", result.text);
127//! # Ok(())
128//! # }
129//! ```
130//!
131//! ## Custom Configuration
132//!
133//! ```no_run
134//! use llm_kit_elevenlabs::ElevenLabsClient;
135//!
136//! let provider = ElevenLabsClient::new()
137//! .api_key("your-api-key")
138//! .base_url("https://custom-api.elevenlabs.io")
139//! .header("X-Custom-Header", "custom-value")
140//! .build();
141//! ```
142//!
143//! ## Provider-Specific Options
144//!
145//! ```ignore
146//! use llm_kit_elevenlabs::ElevenLabsClient;
147//! use llm_kit_provider::{Provider, SpeechModel, SpeechSettings};
148//! use std::collections::HashMap;
149//! use serde_json::json;
150//!
151//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
152//! let provider = ElevenLabsClient::new()
153//! .api_key("your-api-key")
154//! .build();
155//!
156//! let model = provider.speech_model("eleven_multilingual_v2")?;
157//!
158//! // Provider options with voice settings
159//! let mut provider_options = HashMap::new();
160//! provider_options.insert(
161//! "elevenlabs".to_string(),
162//! json!({
163//! "voiceSettings": {
164//! "stability": 0.7,
165//! "similarityBoost": 0.8,
166//! "style": 0.5,
167//! "useSpeakerBoost": true
168//! },
169//! "seed": 12345
170//! })
171//! .as_object()
172//! .unwrap()
173//! .clone(),
174//! );
175//!
176//! let settings = SpeechSettings::default().with_provider_options(provider_options);
177//! let result = model.do_generate("Hello with custom voice settings!", settings).await?;
178//! # Ok(())
179//! # }
180//! ```
181
182/// Builder for creating ElevenLabs provider.
183pub mod client;
184
185/// Configuration for ElevenLabs models.
186pub mod config;
187
188/// Error handling for ElevenLabs API.
189pub mod error;
190
191/// Provider implementation.
192pub mod provider;
193
194/// Provider settings and configuration.
195pub mod settings;
196
197/// Speech (text-to-speech) model implementation.
198pub mod speech;
199
200/// Transcription (speech-to-text) model implementation.
201pub mod transcription;
202
203// Re-export main types
204pub use client::ElevenLabsClient;
205pub use provider::ElevenLabsProvider;
206pub use settings::ElevenLabsProviderSettings;
207
208// Re-export speech types
209pub use speech::{
210 ElevenLabsSpeechModel, ElevenLabsSpeechModelId, ElevenLabsSpeechProviderOptions,
211 ElevenLabsSpeechVoiceId,
212};
213
214// Re-export transcription types
215pub use transcription::{
216 ElevenLabsTranscriptionModel, ElevenLabsTranscriptionModelId,
217 ElevenLabsTranscriptionProviderOptions,
218};