SpeechConfig

Struct SpeechConfig 

Source
pub struct SpeechConfig {
    pub voice_config: Option<VoiceConfig>,
    pub multi_speaker_voice_config: Option<MultiSpeakerVoiceConfig>,
}
Expand description

Configuration for speech generation (text-to-speech)

Fields§

§voice_config: Option<VoiceConfig>

Single voice configuration

§multi_speaker_voice_config: Option<MultiSpeakerVoiceConfig>

Multi-speaker voice configuration

Implementations§

Source§

impl SpeechConfig

Source

pub fn single_voice(voice_name: impl Into<String>) -> Self

Create a new speech config with a single voice

Source

pub fn multi_speaker(speakers: Vec<SpeakerVoiceConfig>) -> Self

Create a new speech config with multiple speakers

Examples found in repository?
examples/multi_speaker_tts.rs (line 27)
7async fn main() -> Result<(), Box<dyn std::error::Error>> {
8    // Load API key from environment variable
9    let api_key =
10        std::env::var("GEMINI_API_KEY").expect("Please set GEMINI_API_KEY environment variable");
11
12    // Create client with TTS-enabled model
13    let client = Gemini::with_model(api_key, "models/gemini-2.5-flash-preview-tts".to_string());
14
15    println!("🎭 Gemini Multi-Speaker Speech Generation Example");
16    println!("Generating multi-speaker audio from dialogue...\n");
17
18    // Create multi-speaker configuration
19    let speakers = vec![
20        SpeakerVoiceConfig::new("Alice", "Puck"),
21        SpeakerVoiceConfig::new("Bob", "Charon"),
22    ];
23
24    // Create generation config with multi-speaker speech settings
25    let generation_config = GenerationConfig {
26        response_modalities: Some(vec!["AUDIO".to_string()]),
27        speech_config: Some(SpeechConfig::multi_speaker(speakers)),
28        ..Default::default()
29    };
30
31    // Create a dialogue with speaker tags
32    let dialogue = r#"
33Alice: Hello there! I'm excited to demonstrate multi-speaker text-to-speech with Gemini.
34
35Bob: That's amazing! I can't believe how natural this sounds. The different voices really bring the conversation to life.
36
37Alice: Exactly! Each speaker has their own distinct voice characteristics, making it easy to follow who's speaking.
38
39Bob: This technology opens up so many possibilities for audio content creation, educational materials, and accessibility features.
40
41Alice: I couldn't agree more. It's remarkable how far AI-generated speech has come!
42"#;
43
44    match client
45        .generate_content()
46        .with_user_message(dialogue)
47        .with_generation_config(generation_config)
48        .execute()
49        .await
50    {
51        Ok(response) => {
52            println!("✅ Multi-speaker speech generation completed!");
53
54            // Check if we have candidates
55            for (i, candidate) in response.candidates.iter().enumerate() {
56                if let Some(parts) = &candidate.content.parts {
57                    for (j, part) in parts.iter().enumerate() {
58                        match part {
59                            // Look for inline data with audio MIME type
60                            Part::InlineData { inline_data } => {
61                                if inline_data.mime_type.starts_with("audio/") {
62                                    println!("📄 Found audio data: {}", inline_data.mime_type);
63
64                                    // Decode base64 audio data
65                                    match general_purpose::STANDARD.decode(&inline_data.data) {
66                                        Ok(audio_bytes) => {
67                                            let filename =
68                                                format!("multi_speaker_dialogue_{}_{}.pcm", i, j);
69
70                                            // Save audio to file
71                                            match File::create(&filename) {
72                                                Ok(mut file) => {
73                                                    if let Err(e) = file.write_all(&audio_bytes) {
74                                                        eprintln!(
75                                                            "❌ Error writing audio file: {}",
76                                                            e
77                                                        );
78                                                    } else {
79                                                        println!(
80                                                            "💾 Multi-speaker audio saved as: {}",
81                                                            filename
82                                                        );
83                                                        println!("🎧 Play with: aplay {} (Linux) or afplay {} (macOS)", filename, filename);
84                                                        println!("👥 Features Alice (Puck voice) and Bob (Charon voice)");
85                                                    }
86                                                }
87                                                Err(e) => {
88                                                    eprintln!("❌ Error creating audio file: {}", e)
89                                                }
90                                            }
91                                        }
92                                        Err(e) => {
93                                            eprintln!("❌ Error decoding base64 audio: {}", e)
94                                        }
95                                    }
96                                }
97                            }
98                            // Display any text content
99                            Part::Text { text, thought } => {
100                                if thought.unwrap_or(false) {
101                                    println!("💭 Model thought: {}", text);
102                                } else {
103                                    println!("📝 Generated text: {}", text);
104                                }
105                            }
106                            _ => {
107                                // Handle other part types if needed
108                            }
109                        }
110                    }
111                }
112            }
113
114            // Display usage metadata if available
115            if let Some(usage_metadata) = &response.usage_metadata {
116                println!("\n📊 Usage Statistics:");
117                println!("   Prompt tokens: {}", usage_metadata.prompt_token_count);
118                println!("   Total tokens: {}", usage_metadata.total_token_count);
119                if let Some(thoughts_tokens) = usage_metadata.thoughts_token_count {
120                    println!("   Thinking tokens: {}", thoughts_tokens);
121                }
122            }
123        }
124        Err(e) => {
125            eprintln!("❌ Error generating multi-speaker speech: {}", e);
126            eprintln!("\n💡 Troubleshooting tips:");
127            eprintln!("   1. Make sure GEMINI_API_KEY environment variable is set");
128            eprintln!("   2. Verify you have access to the Gemini TTS model");
129            eprintln!("   3. Check your internet connection");
130            eprintln!("   4. Ensure speaker names in dialogue match configured speakers");
131            eprintln!("   5. Make sure the model 'gemini-2.5-flash-preview-tts' supports multi-speaker TTS");
132        }
133    }
134
135    println!("\n🎉 Example completed!");
136    println!("💡 Tips for multi-speaker TTS:");
137    println!("   • Use clear speaker names (Alice:, Bob:, etc.)");
138    println!("   • Configure voice for each speaker beforehand");
139    println!("   • Available voices: Puck, Charon, Kore, Fenrir, Aoede");
140    println!("   • Each speaker maintains consistent voice characteristics");
141
142    Ok(())
143}

Trait Implementations§

Source§

impl Clone for SpeechConfig

Source§

fn clone(&self) -> SpeechConfig

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for SpeechConfig

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl<'de> Deserialize<'de> for SpeechConfig

Source§

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
Source§

impl PartialEq for SpeechConfig

Source§

fn eq(&self, other: &SpeechConfig) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl Serialize for SpeechConfig

Source§

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more
Source§

impl StructuralPartialEq for SpeechConfig

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> PolicyExt for T
where T: ?Sized,

Source§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more
Source§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,

Source§

impl<T> ErasedDestructor for T
where T: 'static,