pub struct SpeechConfig {
pub voice_config: Option<VoiceConfig>,
pub multi_speaker_voice_config: Option<MultiSpeakerVoiceConfig>,
}
Expand description
Configuration for speech generation (text-to-speech)
Fields§
§voice_config: Option<VoiceConfig>
Single voice configuration
multi_speaker_voice_config: Option<MultiSpeakerVoiceConfig>
Multi-speaker voice configuration
Implementations§
Source§impl SpeechConfig
impl SpeechConfig
Sourcepub fn single_voice(voice_name: impl Into<String>) -> Self
pub fn single_voice(voice_name: impl Into<String>) -> Self
Create a new speech config with a single voice
Sourcepub fn multi_speaker(speakers: Vec<SpeakerVoiceConfig>) -> Self
pub fn multi_speaker(speakers: Vec<SpeakerVoiceConfig>) -> Self
Create a new speech config with multiple speakers
Examples found in repository?
examples/multi_speaker_tts.rs (line 27)
7async fn main() -> Result<(), Box<dyn std::error::Error>> {
8 // Load API key from environment variable
9 let api_key =
10 std::env::var("GEMINI_API_KEY").expect("Please set GEMINI_API_KEY environment variable");
11
12 // Create client with TTS-enabled model
13 let client = Gemini::with_model(api_key, "models/gemini-2.5-flash-preview-tts".to_string());
14
15 println!("🎭 Gemini Multi-Speaker Speech Generation Example");
16 println!("Generating multi-speaker audio from dialogue...\n");
17
18 // Create multi-speaker configuration
19 let speakers = vec![
20 SpeakerVoiceConfig::new("Alice", "Puck"),
21 SpeakerVoiceConfig::new("Bob", "Charon"),
22 ];
23
24 // Create generation config with multi-speaker speech settings
25 let generation_config = GenerationConfig {
26 response_modalities: Some(vec!["AUDIO".to_string()]),
27 speech_config: Some(SpeechConfig::multi_speaker(speakers)),
28 ..Default::default()
29 };
30
31 // Create a dialogue with speaker tags
32 let dialogue = r#"
33Alice: Hello there! I'm excited to demonstrate multi-speaker text-to-speech with Gemini.
34
35Bob: That's amazing! I can't believe how natural this sounds. The different voices really bring the conversation to life.
36
37Alice: Exactly! Each speaker has their own distinct voice characteristics, making it easy to follow who's speaking.
38
39Bob: This technology opens up so many possibilities for audio content creation, educational materials, and accessibility features.
40
41Alice: I couldn't agree more. It's remarkable how far AI-generated speech has come!
42"#;
43
44 match client
45 .generate_content()
46 .with_user_message(dialogue)
47 .with_generation_config(generation_config)
48 .execute()
49 .await
50 {
51 Ok(response) => {
52 println!("✅ Multi-speaker speech generation completed!");
53
54 // Check if we have candidates
55 for (i, candidate) in response.candidates.iter().enumerate() {
56 if let Some(parts) = &candidate.content.parts {
57 for (j, part) in parts.iter().enumerate() {
58 match part {
59 // Look for inline data with audio MIME type
60 Part::InlineData { inline_data } => {
61 if inline_data.mime_type.starts_with("audio/") {
62 println!("📄 Found audio data: {}", inline_data.mime_type);
63
64 // Decode base64 audio data
65 match general_purpose::STANDARD.decode(&inline_data.data) {
66 Ok(audio_bytes) => {
67 let filename =
68 format!("multi_speaker_dialogue_{}_{}.pcm", i, j);
69
70 // Save audio to file
71 match File::create(&filename) {
72 Ok(mut file) => {
73 if let Err(e) = file.write_all(&audio_bytes) {
74 eprintln!(
75 "❌ Error writing audio file: {}",
76 e
77 );
78 } else {
79 println!(
80 "💾 Multi-speaker audio saved as: {}",
81 filename
82 );
83 println!("🎧 Play with: aplay {} (Linux) or afplay {} (macOS)", filename, filename);
84 println!("👥 Features Alice (Puck voice) and Bob (Charon voice)");
85 }
86 }
87 Err(e) => {
88 eprintln!("❌ Error creating audio file: {}", e)
89 }
90 }
91 }
92 Err(e) => {
93 eprintln!("❌ Error decoding base64 audio: {}", e)
94 }
95 }
96 }
97 }
98 // Display any text content
99 Part::Text { text, thought } => {
100 if thought.unwrap_or(false) {
101 println!("💭 Model thought: {}", text);
102 } else {
103 println!("📝 Generated text: {}", text);
104 }
105 }
106 _ => {
107 // Handle other part types if needed
108 }
109 }
110 }
111 }
112 }
113
114 // Display usage metadata if available
115 if let Some(usage_metadata) = &response.usage_metadata {
116 println!("\n📊 Usage Statistics:");
117 println!(" Prompt tokens: {}", usage_metadata.prompt_token_count);
118 println!(" Total tokens: {}", usage_metadata.total_token_count);
119 if let Some(thoughts_tokens) = usage_metadata.thoughts_token_count {
120 println!(" Thinking tokens: {}", thoughts_tokens);
121 }
122 }
123 }
124 Err(e) => {
125 eprintln!("❌ Error generating multi-speaker speech: {}", e);
126 eprintln!("\n💡 Troubleshooting tips:");
127 eprintln!(" 1. Make sure GEMINI_API_KEY environment variable is set");
128 eprintln!(" 2. Verify you have access to the Gemini TTS model");
129 eprintln!(" 3. Check your internet connection");
130 eprintln!(" 4. Ensure speaker names in dialogue match configured speakers");
131 eprintln!(" 5. Make sure the model 'gemini-2.5-flash-preview-tts' supports multi-speaker TTS");
132 }
133 }
134
135 println!("\n🎉 Example completed!");
136 println!("💡 Tips for multi-speaker TTS:");
137 println!(" • Use clear speaker names (Alice:, Bob:, etc.)");
138 println!(" • Configure voice for each speaker beforehand");
139 println!(" • Available voices: Puck, Charon, Kore, Fenrir, Aoede");
140 println!(" • Each speaker maintains consistent voice characteristics");
141
142 Ok(())
143}
Trait Implementations§
Source§impl Clone for SpeechConfig
impl Clone for SpeechConfig
Source§fn clone(&self) -> SpeechConfig
fn clone(&self) -> SpeechConfig
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source
. Read moreSource§impl Debug for SpeechConfig
impl Debug for SpeechConfig
Source§impl<'de> Deserialize<'de> for SpeechConfig
impl<'de> Deserialize<'de> for SpeechConfig
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Source§impl PartialEq for SpeechConfig
impl PartialEq for SpeechConfig
Source§impl Serialize for SpeechConfig
impl Serialize for SpeechConfig
impl StructuralPartialEq for SpeechConfig
Auto Trait Implementations§
impl Freeze for SpeechConfig
impl RefUnwindSafe for SpeechConfig
impl Send for SpeechConfig
impl Sync for SpeechConfig
impl Unpin for SpeechConfig
impl UnwindSafe for SpeechConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more