pub struct SpeakerVoiceConfig {
pub speaker: String,
pub voice_config: VoiceConfig,
}
Expand description
Configuration for a specific speaker in multi-speaker TTS
Fields§
§speaker: String
The name of the speaker (must match the name used in the prompt)
voice_config: VoiceConfig
Voice configuration for this speaker
Implementations§
Source§impl SpeakerVoiceConfig
impl SpeakerVoiceConfig
Sourcepub fn new(speaker: impl Into<String>, voice_name: impl Into<String>) -> Self
pub fn new(speaker: impl Into<String>, voice_name: impl Into<String>) -> Self
Create a new speaker voice configuration
Examples found in repository?
examples/multi_speaker_tts.rs (line 20)
7async fn main() -> Result<(), Box<dyn std::error::Error>> {
8 // Load API key from environment variable
9 let api_key =
10 std::env::var("GEMINI_API_KEY").expect("Please set GEMINI_API_KEY environment variable");
11
12 // Create client with TTS-enabled model
13 let client = Gemini::with_model(api_key, "models/gemini-2.5-flash-preview-tts".to_string());
14
15 println!("🎭 Gemini Multi-Speaker Speech Generation Example");
16 println!("Generating multi-speaker audio from dialogue...\n");
17
18 // Create multi-speaker configuration
19 let speakers = vec![
20 SpeakerVoiceConfig::new("Alice", "Puck"),
21 SpeakerVoiceConfig::new("Bob", "Charon"),
22 ];
23
24 // Create generation config with multi-speaker speech settings
25 let generation_config = GenerationConfig {
26 response_modalities: Some(vec!["AUDIO".to_string()]),
27 speech_config: Some(SpeechConfig::multi_speaker(speakers)),
28 ..Default::default()
29 };
30
31 // Create a dialogue with speaker tags
32 let dialogue = r#"
33Alice: Hello there! I'm excited to demonstrate multi-speaker text-to-speech with Gemini.
34
35Bob: That's amazing! I can't believe how natural this sounds. The different voices really bring the conversation to life.
36
37Alice: Exactly! Each speaker has their own distinct voice characteristics, making it easy to follow who's speaking.
38
39Bob: This technology opens up so many possibilities for audio content creation, educational materials, and accessibility features.
40
41Alice: I couldn't agree more. It's remarkable how far AI-generated speech has come!
42"#;
43
44 match client
45 .generate_content()
46 .with_user_message(dialogue)
47 .with_generation_config(generation_config)
48 .execute()
49 .await
50 {
51 Ok(response) => {
52 println!("✅ Multi-speaker speech generation completed!");
53
54 // Check if we have candidates
55 for (i, candidate) in response.candidates.iter().enumerate() {
56 if let Some(parts) = &candidate.content.parts {
57 for (j, part) in parts.iter().enumerate() {
58 match part {
59 // Look for inline data with audio MIME type
60 Part::InlineData { inline_data } => {
61 if inline_data.mime_type.starts_with("audio/") {
62 println!("📄 Found audio data: {}", inline_data.mime_type);
63
64 // Decode base64 audio data
65 match general_purpose::STANDARD.decode(&inline_data.data) {
66 Ok(audio_bytes) => {
67 let filename =
68 format!("multi_speaker_dialogue_{}_{}.pcm", i, j);
69
70 // Save audio to file
71 match File::create(&filename) {
72 Ok(mut file) => {
73 if let Err(e) = file.write_all(&audio_bytes) {
74 eprintln!(
75 "❌ Error writing audio file: {}",
76 e
77 );
78 } else {
79 println!(
80 "💾 Multi-speaker audio saved as: {}",
81 filename
82 );
83 println!("🎧 Play with: aplay {} (Linux) or afplay {} (macOS)", filename, filename);
84 println!("👥 Features Alice (Puck voice) and Bob (Charon voice)");
85 }
86 }
87 Err(e) => {
88 eprintln!("❌ Error creating audio file: {}", e)
89 }
90 }
91 }
92 Err(e) => {
93 eprintln!("❌ Error decoding base64 audio: {}", e)
94 }
95 }
96 }
97 }
98 // Display any text content
99 Part::Text { text, thought } => {
100 if thought.unwrap_or(false) {
101 println!("💭 Model thought: {}", text);
102 } else {
103 println!("📝 Generated text: {}", text);
104 }
105 }
106 _ => {
107 // Handle other part types if needed
108 }
109 }
110 }
111 }
112 }
113
114 // Display usage metadata if available
115 if let Some(usage_metadata) = &response.usage_metadata {
116 println!("\n📊 Usage Statistics:");
117 println!(" Prompt tokens: {}", usage_metadata.prompt_token_count);
118 println!(" Total tokens: {}", usage_metadata.total_token_count);
119 if let Some(thoughts_tokens) = usage_metadata.thoughts_token_count {
120 println!(" Thinking tokens: {}", thoughts_tokens);
121 }
122 }
123 }
124 Err(e) => {
125 eprintln!("❌ Error generating multi-speaker speech: {}", e);
126 eprintln!("\n💡 Troubleshooting tips:");
127 eprintln!(" 1. Make sure GEMINI_API_KEY environment variable is set");
128 eprintln!(" 2. Verify you have access to the Gemini TTS model");
129 eprintln!(" 3. Check your internet connection");
130 eprintln!(" 4. Ensure speaker names in dialogue match configured speakers");
131 eprintln!(" 5. Make sure the model 'gemini-2.5-flash-preview-tts' supports multi-speaker TTS");
132 }
133 }
134
135 println!("\n🎉 Example completed!");
136 println!("💡 Tips for multi-speaker TTS:");
137 println!(" • Use clear speaker names (Alice:, Bob:, etc.)");
138 println!(" • Configure voice for each speaker beforehand");
139 println!(" • Available voices: Puck, Charon, Kore, Fenrir, Aoede");
140 println!(" • Each speaker maintains consistent voice characteristics");
141
142 Ok(())
143}
Trait Implementations§
Source§impl Clone for SpeakerVoiceConfig
impl Clone for SpeakerVoiceConfig
Source§fn clone(&self) -> SpeakerVoiceConfig
fn clone(&self) -> SpeakerVoiceConfig
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source
. Read moreSource§impl Debug for SpeakerVoiceConfig
impl Debug for SpeakerVoiceConfig
Source§impl<'de> Deserialize<'de> for SpeakerVoiceConfig
impl<'de> Deserialize<'de> for SpeakerVoiceConfig
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Source§impl PartialEq for SpeakerVoiceConfig
impl PartialEq for SpeakerVoiceConfig
Source§impl Serialize for SpeakerVoiceConfig
impl Serialize for SpeakerVoiceConfig
impl StructuralPartialEq for SpeakerVoiceConfig
Auto Trait Implementations§
impl Freeze for SpeakerVoiceConfig
impl RefUnwindSafe for SpeakerVoiceConfig
impl Send for SpeakerVoiceConfig
impl Sync for SpeakerVoiceConfig
impl Unpin for SpeakerVoiceConfig
impl UnwindSafe for SpeakerVoiceConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more