Crate llm_kit_elevenlabs

Crate llm_kit_elevenlabs 

Source
Expand description

ElevenLabs provider for the LLM Kit.

This crate provides an ElevenLabs provider implementation for the LLM Kit, supporting text-to-speech and speech-to-text models.

§Features

  • Text-to-Speech (TTS): Convert text to natural-sounding speech with multiple voices
  • Speech-to-Text (STT): Transcribe audio files to text with timestamps
  • Voice Settings: Fine-tune stability, similarity, and style
  • Speaker Diarization: Identify different speakers in transcriptions
  • Multiple Languages: Support for 29+ languages

§Quick Start

use llm_kit_elevenlabs::ElevenLabsClient;
use llm_kit_provider::Provider;
use llm_kit_provider::speech_model::call_options::SpeechModelCallOptions;

// Create provider using builder
let provider = ElevenLabsClient::new()
    .api_key("your-api-key")
    .build();

let model = provider.speech_model("eleven_multilingual_v2")?;

// Generate speech
let options = SpeechModelCallOptions {
    text: "Hello, how are you?".to_string(),
    voice: Some("21m00Tcm4TlvDq8ikWAM".to_string()),  // Rachel voice
    output_format: Some("mp3_44100_128".to_string()),
    speed: None,
    language: None,
    instructions: None,
    headers: None,
    provider_options: None,
    abort_signal: None,
};

let result = model.do_generate(options).await?;

println!("Generated {} bytes of audio", result.audio.len());

§Alternative: Direct Instantiation

use llm_kit_elevenlabs::{ElevenLabsProvider, ElevenLabsProviderSettings};
use llm_kit_provider::Provider;

let provider = ElevenLabsProvider::new(
    ElevenLabsProviderSettings::new()
        .with_api_key("your-api-key")
);

§Examples

§Text-to-Speech

use llm_kit_elevenlabs::ElevenLabsClient;
use llm_kit_provider::Provider;
use llm_kit_provider::speech_model::{AudioData, call_options::SpeechModelCallOptions};

let provider = ElevenLabsClient::new()
    .api_key("your-api-key")
    .build();

let model = provider.speech_model("eleven_multilingual_v2")?;

let options = SpeechModelCallOptions {
    text: "Hello, world!".to_string(),
    voice: Some("21m00Tcm4TlvDq8ikWAM".to_string()),
    output_format: Some("mp3_44100_128".to_string()),
    speed: None,
    language: None,
    instructions: None,
    headers: None,
    provider_options: None,
    abort_signal: None,
};

let result = model.do_generate(options).await?;

// Extract audio bytes from the AudioData enum
let audio_bytes = match result.audio {
    AudioData::Binary(bytes) => bytes,
    AudioData::Base64(base64_str) => {
        use base64::{Engine as _, engine::general_purpose};
        general_purpose::STANDARD.decode(base64_str)?
    }
};

std::fs::write("output.mp3", audio_bytes)?;

§Speech-to-Text Transcription

use llm_kit_elevenlabs::ElevenLabsClient;
use llm_kit_provider::{Provider, TranscriptionModel, TranscriptionInput, TranscriptionSettings};
use llm_kit_provider_utils::DataContent;
use std::fs;

let provider = ElevenLabsClient::new()
    .api_key("your-api-key")
    .build();

let model = provider.transcription_model("scribe_v1")?;

let audio_bytes = fs::read("audio.mp3")?;
let input = TranscriptionInput::Data(DataContent::from_bytes(audio_bytes, "audio/mpeg"));
let result = model.do_transcribe(input, TranscriptionSettings::default()).await?;

println!("Transcription: {}", result.text);

§Custom Configuration

use llm_kit_elevenlabs::ElevenLabsClient;

let provider = ElevenLabsClient::new()
    .api_key("your-api-key")
    .base_url("https://custom-api.elevenlabs.io")
    .header("X-Custom-Header", "custom-value")
    .build();

§Provider-Specific Options

use llm_kit_elevenlabs::ElevenLabsClient;
use llm_kit_provider::{Provider, SpeechModel, SpeechSettings};
use std::collections::HashMap;
use serde_json::json;

let provider = ElevenLabsClient::new()
    .api_key("your-api-key")
    .build();

let model = provider.speech_model("eleven_multilingual_v2")?;

// Provider options with voice settings
let mut provider_options = HashMap::new();
provider_options.insert(
    "elevenlabs".to_string(),
    json!({
        "voiceSettings": {
            "stability": 0.7,
            "similarityBoost": 0.8,
            "style": 0.5,
            "useSpeakerBoost": true
        },
        "seed": 12345
    })
    .as_object()
    .unwrap()
    .clone(),
);

let settings = SpeechSettings::default().with_provider_options(provider_options);
let result = model.do_generate("Hello with custom voice settings!", settings).await?;

Re-exports§

pub use client::ElevenLabsClient;
pub use provider::ElevenLabsProvider;
pub use settings::ElevenLabsProviderSettings;
pub use speech::ElevenLabsSpeechModel;
pub use speech::ElevenLabsSpeechModelId;
pub use speech::ElevenLabsSpeechProviderOptions;
pub use speech::ElevenLabsSpeechVoiceId;
pub use transcription::ElevenLabsTranscriptionModel;
pub use transcription::ElevenLabsTranscriptionModelId;
pub use transcription::ElevenLabsTranscriptionProviderOptions;

Modules§

client
Builder for creating ElevenLabs provider.
config
Configuration for ElevenLabs models.
error
Error handling for ElevenLabs API.
provider
Provider implementation.
settings
Provider settings and configuration.
speech
Speech (text-to-speech) model implementation.
transcription
Transcription (speech-to-text) model implementation.