gemini_rust/generation/
model.rs

1use reqwest::Url;
2use serde::{Deserialize, Serialize};
3use time::OffsetDateTime;
4
5use crate::{
6    safety::{SafetyRating, SafetySetting},
7    Content, Modality, Part,
8};
9
10/// Reason why generation finished
11#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
12#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
13pub enum FinishReason {
14    /// Default value. This value is unused.
15    FinishReasonUnspecified,
16    /// Natural stop point of the model or provided stop sequence.
17    Stop,
18    /// The maximum number of tokens as specified in the request was reached.
19    MaxTokens,
20    /// The response candidate content was flagged for safety reasons.
21    Safety,
22    /// The response candidate content was flagged for recitation reasons.
23    Recitation,
24    /// The response candidate content was flagged for using an unsupported language.
25    Language,
26    /// Unknown reason.
27    Other,
28    /// Token generation stopped because the content contains forbidden terms.
29    Blocklist,
30    /// Token generation stopped for potentially containing prohibited content.
31    ProhibitedContent,
32    /// Token generation stopped because the content potentially contains Sensitive Personally Identifiable Information (SPII).
33    Spii,
34    /// The function call generated by the model is invalid.
35    MalformedFunctionCall,
36    /// Token generation stopped because generated images contain safety violations.
37    ImageSafety,
38    /// Model generated a tool call but no tools were enabled in the request.
39    UnexpectedToolCall,
40    /// Model called too many tools consecutively, thus the system exited execution.
41    TooManyToolCalls,
42}
43
44/// Citation metadata for content
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
46#[serde(rename_all = "camelCase")]
47pub struct CitationMetadata {
48    /// The citation sources
49    pub citation_sources: Vec<CitationSource>,
50}
51
52/// Citation source
53#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
54#[serde(rename_all = "camelCase")]
55pub struct CitationSource {
56    /// The URI of the citation source
57    pub uri: Option<String>,
58    /// The title of the citation source
59    pub title: Option<String>,
60    /// The start index of the citation in the response
61    pub start_index: Option<i32>,
62    /// The end index of the citation in the response
63    pub end_index: Option<i32>,
64    /// The license of the citation source
65    pub license: Option<String>,
66    /// The publication date of the citation source
67    #[serde(default, with = "time::serde::rfc3339::option")]
68    pub publication_date: Option<OffsetDateTime>,
69}
70
71/// A candidate response
72#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
73#[serde(rename_all = "camelCase")]
74pub struct Candidate {
75    /// The content of the candidate
76    #[serde(default)]
77    pub content: Content,
78    /// The safety ratings for the candidate
79    #[serde(skip_serializing_if = "Option::is_none")]
80    pub safety_ratings: Option<Vec<SafetyRating>>,
81    /// The citation metadata for the candidate
82    #[serde(skip_serializing_if = "Option::is_none")]
83    pub citation_metadata: Option<CitationMetadata>,
84    /// The grounding metadata for the candidate
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub grounding_metadata: Option<GroundingMetadata>,
87    /// The finish reason for the candidate
88    #[serde(skip_serializing_if = "Option::is_none")]
89    pub finish_reason: Option<FinishReason>,
90    /// The index of the candidate
91    #[serde(skip_serializing_if = "Option::is_none")]
92    pub index: Option<i32>,
93}
94
95/// Metadata about token usage
96#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
97#[serde(rename_all = "camelCase")]
98pub struct UsageMetadata {
99    /// The number of prompt tokens (null if request processing failed)
100    #[serde(skip_serializing_if = "Option::is_none")]
101    pub prompt_token_count: Option<i32>,
102    /// The number of response tokens (null if generation failed)
103    #[serde(skip_serializing_if = "Option::is_none")]
104    pub candidates_token_count: Option<i32>,
105    /// The total number of tokens (null if individual counts unavailable)
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub total_token_count: Option<i32>,
108    /// The number of thinking tokens (Gemini 2.5 series only)
109    #[serde(skip_serializing_if = "Option::is_none")]
110    pub thoughts_token_count: Option<i32>,
111    /// Detailed prompt token information
112    #[serde(skip_serializing_if = "Option::is_none")]
113    pub prompt_tokens_details: Option<Vec<PromptTokenDetails>>,
114    /// The number of cached content tokens (batch API)
115    #[serde(skip_serializing_if = "Option::is_none")]
116    pub cached_content_token_count: Option<i32>,
117    /// Detailed cache token information (batch API)
118    #[serde(skip_serializing_if = "Option::is_none")]
119    pub cache_tokens_details: Option<Vec<PromptTokenDetails>>,
120}
121
122/// Details about prompt tokens by modality
123#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
124#[serde(rename_all = "camelCase")]
125pub struct PromptTokenDetails {
126    /// The modality (e.g., "TEXT")
127    pub modality: Modality,
128    /// Token count for this modality
129    pub token_count: i32,
130}
131
132/// Grounding metadata for responses that use grounding tools
133#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
134#[serde(rename_all = "camelCase")]
135pub struct GroundingMetadata {
136    /// Grounding chunks containing source information
137    #[serde(skip_serializing_if = "Option::is_none")]
138    pub grounding_chunks: Option<Vec<GroundingChunk>>,
139    /// Grounding supports connecting response text to sources
140    #[serde(skip_serializing_if = "Option::is_none")]
141    pub grounding_supports: Option<Vec<GroundingSupport>>,
142    /// Web search queries used for grounding
143    #[serde(skip_serializing_if = "Option::is_none")]
144    pub web_search_queries: Option<Vec<String>>,
145    /// Google Maps widget context token
146    #[serde(skip_serializing_if = "Option::is_none")]
147    pub google_maps_widget_context_token: Option<String>,
148}
149
150/// A chunk of grounding information from a source
151#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
152#[serde(rename_all = "camelCase")]
153pub struct GroundingChunk {
154    /// Maps-specific grounding information
155    #[serde(skip_serializing_if = "Option::is_none")]
156    pub maps: Option<MapsGroundingChunk>,
157    /// Web-specific grounding information
158    #[serde(skip_serializing_if = "Option::is_none")]
159    pub web: Option<WebGroundingChunk>,
160}
161
162/// Maps-specific grounding chunk information
163#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
164#[serde(rename_all = "camelCase")]
165pub struct MapsGroundingChunk {
166    /// The URI of the Maps source
167    pub uri: Url,
168    /// The title of the Maps source
169    pub title: String,
170    /// The place ID from Google Maps
171    #[serde(skip_serializing_if = "Option::is_none")]
172    pub place_id: Option<String>,
173}
174
175/// Web-specific grounding chunk information
176#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
177#[serde(rename_all = "camelCase")]
178pub struct WebGroundingChunk {
179    /// The URI of the web source
180    pub uri: Url,
181    /// The title of the web source
182    pub title: String,
183}
184
185/// Support information connecting response text to grounding sources
186#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
187#[serde(rename_all = "camelCase")]
188pub struct GroundingSupport {
189    /// Segment of the response text
190    pub segment: GroundingSegment,
191    /// Indices of grounding chunks that support this segment
192    pub grounding_chunk_indices: Vec<u32>,
193}
194
195/// A segment of response text
196#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
197#[serde(rename_all = "camelCase")]
198pub struct GroundingSegment {
199    /// Start index of the segment in the response text
200    pub start_index: u32,
201    /// End index of the segment in the response text
202    pub end_index: u32,
203    /// The text content of the segment
204    pub text: String,
205}
206
207/// Response from the Gemini API for content generation
208#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
209#[serde(rename_all = "camelCase")]
210pub struct GenerationResponse {
211    /// The candidates generated
212    #[serde(default, skip_serializing_if = "Vec::is_empty")]
213    pub candidates: Vec<Candidate>,
214    /// The prompt feedback
215    #[serde(skip_serializing_if = "Option::is_none")]
216    pub prompt_feedback: Option<PromptFeedback>,
217    /// Usage metadata
218    #[serde(skip_serializing_if = "Option::is_none")]
219    pub usage_metadata: Option<UsageMetadata>,
220    /// Model version used
221    #[serde(skip_serializing_if = "Option::is_none")]
222    pub model_version: Option<String>,
223    /// Response ID
224    #[serde(skip_serializing_if = "Option::is_none")]
225    pub response_id: Option<String>,
226}
227
228/// Reason why content was blocked
229#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
230#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
231pub enum BlockReason {
232    /// Default value. This value is unused.
233    BlockReasonUnspecified,
234    /// Prompt was blocked due to safety reasons. Inspect safetyRatings to understand which safety category blocked it.
235    Safety,
236    /// Prompt was blocked due to unknown reasons.
237    Other,
238    /// Prompt was blocked due to the terms which are included from the terminology blocklist.
239    Blocklist,
240    /// Prompt was blocked due to prohibited content.
241    ProhibitedContent,
242    /// Candidates blocked due to unsafe image generation content.
243    ImageSafety,
244}
245
246/// Feedback about the prompt
247#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
248#[serde(rename_all = "camelCase")]
249pub struct PromptFeedback {
250    /// The safety ratings for the prompt
251    #[serde(default, skip_serializing_if = "Vec::is_empty")]
252    pub safety_ratings: Vec<SafetyRating>,
253    /// The block reason if the prompt was blocked
254    #[serde(skip_serializing_if = "Option::is_none")]
255    pub block_reason: Option<BlockReason>,
256}
257
258impl GenerationResponse {
259    /// Get the text of the first candidate
260    pub fn text(&self) -> String {
261        self.candidates
262            .first()
263            .and_then(|c| {
264                c.content.parts.as_ref().and_then(|parts| {
265                    parts.first().and_then(|p| match p {
266                        Part::Text {
267                            text,
268                            thought: _,
269                            thought_signature: _,
270                        } => Some(text.clone()),
271                        _ => None,
272                    })
273                })
274            })
275            .unwrap_or_default()
276    }
277
278    /// Get function calls from the response
279    pub fn function_calls(&self) -> Vec<&crate::tools::FunctionCall> {
280        self.candidates
281            .iter()
282            .flat_map(|c| {
283                c.content
284                    .parts
285                    .as_ref()
286                    .map(|parts| {
287                        parts
288                            .iter()
289                            .filter_map(|p| match p {
290                                Part::FunctionCall {
291                                    function_call,
292                                    thought_signature: _,
293                                } => Some(function_call),
294                                _ => None,
295                            })
296                            .collect::<Vec<_>>()
297                    })
298                    .unwrap_or_default()
299            })
300            .collect()
301    }
302
303    /// Get function calls with their thought signatures from the response
304    pub fn function_calls_with_thoughts(
305        &self,
306    ) -> Vec<(&crate::tools::FunctionCall, Option<&String>)> {
307        self.candidates
308            .iter()
309            .flat_map(|c| {
310                c.content
311                    .parts
312                    .as_ref()
313                    .map(|parts| {
314                        parts
315                            .iter()
316                            .filter_map(|p| match p {
317                                Part::FunctionCall {
318                                    function_call,
319                                    thought_signature,
320                                } => Some((function_call, thought_signature.as_ref())),
321                                _ => None,
322                            })
323                            .collect::<Vec<_>>()
324                    })
325                    .unwrap_or_default()
326            })
327            .collect()
328    }
329
330    /// Get thought summaries from the response
331    pub fn thoughts(&self) -> Vec<String> {
332        self.candidates
333            .iter()
334            .flat_map(|c| {
335                c.content
336                    .parts
337                    .as_ref()
338                    .map(|parts| {
339                        parts
340                            .iter()
341                            .filter_map(|p| match p {
342                                Part::Text {
343                                    text,
344                                    thought: Some(true),
345                                    thought_signature: _,
346                                } => Some(text.clone()),
347                                _ => None,
348                            })
349                            .collect::<Vec<_>>()
350                    })
351                    .unwrap_or_default()
352            })
353            .collect()
354    }
355
356    /// Get all text parts (both regular text and thoughts)
357    pub fn all_text(&self) -> Vec<(String, bool)> {
358        self.candidates
359            .iter()
360            .flat_map(|c| {
361                c.content
362                    .parts
363                    .as_ref()
364                    .map(|parts| {
365                        parts
366                            .iter()
367                            .filter_map(|p| match p {
368                                Part::Text {
369                                    text,
370                                    thought,
371                                    thought_signature: _,
372                                } => Some((text.clone(), thought.unwrap_or(false))),
373                                _ => None,
374                            })
375                            .collect::<Vec<_>>()
376                    })
377                    .unwrap_or_default()
378            })
379            .collect()
380    }
381
382    /// Get text parts with their thought signatures from the response
383    pub fn text_with_thoughts(&self) -> Vec<(String, bool, Option<&String>)> {
384        self.candidates
385            .iter()
386            .flat_map(|c| {
387                c.content
388                    .parts
389                    .as_ref()
390                    .map(|parts| {
391                        parts
392                            .iter()
393                            .filter_map(|p| match p {
394                                Part::Text {
395                                    text,
396                                    thought,
397                                    thought_signature,
398                                } => Some((
399                                    text.clone(),
400                                    thought.unwrap_or(false),
401                                    thought_signature.as_ref(),
402                                )),
403                                _ => None,
404                            })
405                            .collect::<Vec<_>>()
406                    })
407                    .unwrap_or_default()
408            })
409            .collect()
410    }
411}
412
413/// Request to generate content
414#[derive(Debug, Clone, Serialize, Deserialize)]
415#[serde(rename_all = "camelCase")]
416pub struct GenerateContentRequest {
417    /// The contents to generate content from
418    pub contents: Vec<Content>,
419    /// The generation config
420    #[serde(skip_serializing_if = "Option::is_none")]
421    pub generation_config: Option<GenerationConfig>,
422    /// The safety settings
423    #[serde(skip_serializing_if = "Option::is_none")]
424    pub safety_settings: Option<Vec<SafetySetting>>,
425    /// The tools that the model can use
426    #[serde(skip_serializing_if = "Option::is_none")]
427    pub tools: Option<Vec<crate::tools::Tool>>,
428    /// The tool config
429    #[serde(skip_serializing_if = "Option::is_none")]
430    pub tool_config: Option<crate::tools::ToolConfig>,
431    /// The system instruction
432    #[serde(skip_serializing_if = "Option::is_none")]
433    pub system_instruction: Option<Content>,
434    /// The cached content to use
435    #[serde(skip_serializing_if = "Option::is_none")]
436    pub cached_content: Option<String>,
437}
438
439/// Thinking level for Gemini 3 Pro models
440///
441/// Controls the depth of reasoning and analysis the model applies.
442#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
443#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
444pub enum ThinkingLevel {
445    /// Unspecified thinking level (uses model default)
446    ThinkingLevelUnspecified,
447    /// Low thinking level - faster responses with less reasoning
448    Low,
449    /// High thinking level - deeper analysis with more comprehensive reasoning
450    High,
451}
452
453/// Media resolution level for images and PDFs
454///
455/// Controls the resolution used when processing inline images and PDF documents,
456/// which affects both quality and token consumption.
457#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
458#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
459pub enum MediaResolutionLevel {
460    /// Unspecified resolution (uses model default)
461    MediaResolutionUnspecified,
462    /// Low resolution - uses fewer tokens, lower quality
463    MediaResolutionLow,
464    /// Medium resolution - balanced token usage and quality
465    MediaResolutionMedium,
466    /// High resolution - uses more tokens, higher quality
467    MediaResolutionHigh,
468}
469
470/// Wrapper struct for per-part media resolution.
471/// Allows fine-grained control over the resolution used for individual inline images and PDFs.
472#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
473pub struct MediaResolution {
474    /// The media resolution level to use
475    pub level: MediaResolutionLevel,
476}
477
478/// Configuration for thinking (Gemini 2.5 and Gemini 3 series)
479///
480/// - For Gemini 2.5 models, use `thinking_budget` and `include_thoughts`.
481/// - For Gemini 3 models, use `thinking_level` (mutually exclusive with `thinking_budget`).
482#[derive(Debug, Clone, Serialize, Deserialize)]
483#[serde(rename_all = "camelCase")]
484pub struct ThinkingConfig {
485    /// The thinking budget (number of thinking tokens)
486    ///
487    /// - Set to 0 to disable thinking
488    /// - Set to -1 for dynamic thinking (model decides)
489    /// - Set to a positive number for a specific token budget
490    ///
491    /// Model-specific ranges:
492    /// - 2.5 Pro: 128 to 32768 (cannot disable thinking)
493    /// - 2.5 Flash: 0 to 24576
494    /// - 2.5 Flash Lite: 512 to 24576
495    #[serde(skip_serializing_if = "Option::is_none")]
496    pub thinking_budget: Option<i32>,
497
498    /// Whether to include thought summaries in the response
499    ///
500    /// When enabled, the response will include synthesized versions of the model's
501    /// raw thoughts, providing insights into the reasoning process.
502    #[serde(skip_serializing_if = "Option::is_none")]
503    pub include_thoughts: Option<bool>,
504
505    /// The thinking level (Required for Gemini 3)
506    ///
507    /// Gemini 3 uses thinking_level (Low/High) which is mutually exclusive with thinking_budget
508    #[serde(skip_serializing_if = "Option::is_none")]
509    pub thinking_level: Option<ThinkingLevel>,
510}
511
512impl ThinkingConfig {
513    /// Create a new thinking config with default settings
514    pub fn new() -> Self {
515        Self {
516            thinking_budget: None,
517            include_thoughts: None,
518            thinking_level: None,
519        }
520    }
521
522    /// Set the thinking budget
523    pub fn with_thinking_budget(mut self, budget: i32) -> Self {
524        self.thinking_budget = Some(budget);
525        self
526    }
527
528    /// Enable dynamic thinking (model decides the budget)
529    pub fn with_dynamic_thinking(mut self) -> Self {
530        self.thinking_budget = Some(-1);
531        self
532    }
533
534    /// Include thought summaries in the response
535    pub fn with_thoughts_included(mut self, include: bool) -> Self {
536        self.include_thoughts = Some(include);
537        self
538    }
539
540    /// Set the thinking level (Required for Gemini 3)
541    pub fn with_thinking_level(mut self, level: ThinkingLevel) -> Self {
542        self.thinking_level = Some(level);
543        self
544    }
545
546    /// Create a thinking config that enables dynamic thinking with thoughts included
547    pub fn dynamic_thinking() -> Self {
548        Self {
549            thinking_budget: Some(-1),
550            include_thoughts: Some(true),
551            thinking_level: None,
552        }
553    }
554}
555
556impl Default for ThinkingConfig {
557    fn default() -> Self {
558        Self::new()
559    }
560}
561
562/// Configuration for generation
563#[derive(Debug, Default, Clone, Serialize, Deserialize)]
564#[serde(rename_all = "camelCase")]
565pub struct GenerationConfig {
566    /// The temperature for the model (0.0 to 1.0)
567    ///
568    /// Controls the randomness of the output. Higher values (e.g., 0.9) make output
569    /// more random, lower values (e.g., 0.1) make output more deterministic.
570    #[serde(skip_serializing_if = "Option::is_none")]
571    pub temperature: Option<f32>,
572
573    /// The top-p value for the model (0.0 to 1.0)
574    ///
575    /// For each token generation step, the model considers the top_p percentage of
576    /// probability mass for potential token choices. Lower values are more selective,
577    /// higher values allow more variety.
578    #[serde(skip_serializing_if = "Option::is_none")]
579    pub top_p: Option<f32>,
580
581    /// The top-k value for the model
582    ///
583    /// For each token generation step, the model considers the top_k most likely tokens.
584    /// Lower values are more selective, higher values allow more variety.
585    #[serde(skip_serializing_if = "Option::is_none")]
586    pub top_k: Option<i32>,
587
588    /// The maximum number of tokens to generate
589    ///
590    /// Limits the length of the generated content. One token is roughly 4 characters.
591    #[serde(skip_serializing_if = "Option::is_none")]
592    pub max_output_tokens: Option<i32>,
593
594    /// The candidate count
595    ///
596    /// Number of alternative responses to generate.
597    #[serde(skip_serializing_if = "Option::is_none")]
598    pub candidate_count: Option<i32>,
599
600    /// Whether to stop on specific sequences
601    ///
602    /// The model will stop generating content when it encounters any of these sequences.
603    #[serde(skip_serializing_if = "Option::is_none")]
604    pub stop_sequences: Option<Vec<String>>,
605
606    /// The response mime type
607    ///
608    /// Specifies the format of the model's response.
609    #[serde(skip_serializing_if = "Option::is_none")]
610    pub response_mime_type: Option<String>,
611    /// The response schema
612    ///
613    /// Specifies the JSON schema for structured responses.
614    #[serde(skip_serializing_if = "Option::is_none")]
615    pub response_schema: Option<serde_json::Value>,
616
617    /// Response modalities (for TTS and other multimodal outputs)
618    #[serde(skip_serializing_if = "Option::is_none")]
619    pub response_modalities: Option<Vec<String>>,
620
621    /// Optional. Config for image generation. An error will be returned if this field is set for models
622    /// that don't support these config options.
623    #[serde(skip_serializing_if = "Option::is_none")]
624    pub image_config: Option<ImageConfig>,
625
626    /// Speech configuration for text-to-speech generation
627    #[serde(skip_serializing_if = "Option::is_none")]
628    pub speech_config: Option<SpeechConfig>,
629
630    /// The thinking configuration
631    ///
632    /// Configuration for the model's thinking process (Gemini 2.5 and Gemini 3 series).
633    #[serde(skip_serializing_if = "Option::is_none")]
634    pub thinking_config: Option<ThinkingConfig>,
635
636    /// Global media resolution for all images and PDFs.
637    /// Controls the resolution used for inline image and PDF data, affecting token usage.
638    /// Can be overridden per-part using the Part::InlineData media_resolution field.
639    #[serde(skip_serializing_if = "Option::is_none", rename = "media_resolution")]
640    pub media_resolution: Option<MediaResolutionLevel>,
641}
642
643/// Response from the Gemini API for token counting
644#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
645#[serde(rename_all = "camelCase")]
646pub struct CountTokensResponse {
647    /// The total number of tokens counted across all instances.
648    pub total_tokens: u32,
649    /// The total number of tokens in the cached content.
650    #[serde(skip_serializing_if = "Option::is_none")]
651    pub cached_content_token_count: Option<u32>,
652}
653
654/// Config for image generation features.
655#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
656#[serde(rename_all = "camelCase")]
657pub struct ImageConfig {
658    /// Optional. The aspect ratio of the image to generate. Supported aspect ratios: 1:1, 2:3, 3:2, 3:4,
659    /// 4:3, 9:16, 16:9, 21:9.
660    ///
661    /// If not specified, the model will choose a default aspect ratio based on any reference images
662    /// provided.
663    #[serde(skip_serializing_if = "Option::is_none")]
664    pub aspect_ratio: Option<String>,
665    /// Optional. Specifies the size of generated images. Supported values are `1K`, `2K`, `4K`. If not
666    /// specified, the model will use default value `1K`.
667    #[serde(skip_serializing_if = "Option::is_none")]
668    pub image_size: Option<String>,
669}
670
671/// Configuration for speech generation (text-to-speech)
672#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
673#[serde(rename_all = "camelCase")]
674pub struct SpeechConfig {
675    /// Single voice configuration
676    #[serde(skip_serializing_if = "Option::is_none")]
677    pub voice_config: Option<VoiceConfig>,
678    /// Multi-speaker voice configuration
679    #[serde(skip_serializing_if = "Option::is_none")]
680    pub multi_speaker_voice_config: Option<MultiSpeakerVoiceConfig>,
681}
682
683/// Voice configuration for text-to-speech
684#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
685#[serde(rename_all = "camelCase")]
686pub struct VoiceConfig {
687    /// Prebuilt voice configuration
688    #[serde(skip_serializing_if = "Option::is_none")]
689    pub prebuilt_voice_config: Option<PrebuiltVoiceConfig>,
690}
691
692/// Prebuilt voice configuration
693#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
694#[serde(rename_all = "camelCase")]
695pub struct PrebuiltVoiceConfig {
696    /// The name of the voice to use
697    pub voice_name: String,
698}
699
700/// Multi-speaker voice configuration
701#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
702#[serde(rename_all = "camelCase")]
703pub struct MultiSpeakerVoiceConfig {
704    /// Configuration for each speaker
705    pub speaker_voice_configs: Vec<SpeakerVoiceConfig>,
706}
707
708/// Configuration for a specific speaker in multi-speaker TTS
709#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
710#[serde(rename_all = "camelCase")]
711pub struct SpeakerVoiceConfig {
712    /// The name of the speaker (must match the name used in the prompt)
713    pub speaker: String,
714    /// Voice configuration for this speaker
715    pub voice_config: VoiceConfig,
716}
717
718impl SpeechConfig {
719    /// Create a new speech config with a single voice
720    pub fn single_voice(voice_name: impl Into<String>) -> Self {
721        Self {
722            voice_config: Some(VoiceConfig {
723                prebuilt_voice_config: Some(PrebuiltVoiceConfig {
724                    voice_name: voice_name.into(),
725                }),
726            }),
727            multi_speaker_voice_config: None,
728        }
729    }
730
731    /// Create a new speech config with multiple speakers
732    pub fn multi_speaker(speakers: Vec<SpeakerVoiceConfig>) -> Self {
733        Self {
734            voice_config: None,
735            multi_speaker_voice_config: Some(MultiSpeakerVoiceConfig {
736                speaker_voice_configs: speakers,
737            }),
738        }
739    }
740}
741
742impl SpeakerVoiceConfig {
743    /// Create a new speaker voice configuration
744    pub fn new(speaker: impl Into<String>, voice_name: impl Into<String>) -> Self {
745        Self {
746            speaker: speaker.into(),
747            voice_config: VoiceConfig {
748                prebuilt_voice_config: Some(PrebuiltVoiceConfig {
749                    voice_name: voice_name.into(),
750                }),
751            },
752        }
753    }
754}