gemini_rust/generation/
model.rs

1use reqwest::Url;
2use serde::{Deserialize, Serialize};
3use time::OffsetDateTime;
4
5use crate::{
6    safety::{SafetyRating, SafetySetting},
7    Content, Modality, Part,
8};
9
10/// Reason why generation finished
11#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
12#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
13pub enum FinishReason {
14    /// Default value. This value is unused.
15    FinishReasonUnspecified,
16    /// Natural stop point of the model or provided stop sequence.
17    Stop,
18    /// The maximum number of tokens as specified in the request was reached.
19    MaxTokens,
20    /// The response candidate content was flagged for safety reasons.
21    Safety,
22    /// The response candidate content was flagged for recitation reasons.
23    Recitation,
24    /// The response candidate content was flagged for using an unsupported language.
25    Language,
26    /// Unknown reason.
27    Other,
28    /// Token generation stopped because the content contains forbidden terms.
29    Blocklist,
30    /// Token generation stopped for potentially containing prohibited content.
31    ProhibitedContent,
32    /// Token generation stopped because the content potentially contains Sensitive Personally Identifiable Information (SPII).
33    Spii,
34    /// The function call generated by the model is invalid.
35    MalformedFunctionCall,
36    /// Token generation stopped because generated images contain safety violations.
37    ImageSafety,
38    /// Model generated a tool call but no tools were enabled in the request.
39    UnexpectedToolCall,
40    /// Model called too many tools consecutively, thus the system exited execution.
41    TooManyToolCalls,
42}
43
44/// Citation metadata for content
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
46#[serde(rename_all = "camelCase")]
47pub struct CitationMetadata {
48    /// The citation sources
49    pub citation_sources: Vec<CitationSource>,
50}
51
52/// Citation source
53#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
54#[serde(rename_all = "camelCase")]
55pub struct CitationSource {
56    /// The URI of the citation source
57    pub uri: Option<String>,
58    /// The title of the citation source
59    pub title: Option<String>,
60    /// The start index of the citation in the response
61    pub start_index: Option<i32>,
62    /// The end index of the citation in the response
63    pub end_index: Option<i32>,
64    /// The license of the citation source
65    pub license: Option<String>,
66    /// The publication date of the citation source
67    #[serde(default, with = "time::serde::rfc3339::option")]
68    pub publication_date: Option<OffsetDateTime>,
69}
70
71/// A candidate response
72#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
73#[serde(rename_all = "camelCase")]
74pub struct Candidate {
75    /// The content of the candidate
76    #[serde(default)]
77    pub content: Content,
78    /// The safety ratings for the candidate
79    #[serde(skip_serializing_if = "Option::is_none")]
80    pub safety_ratings: Option<Vec<SafetyRating>>,
81    /// The citation metadata for the candidate
82    #[serde(skip_serializing_if = "Option::is_none")]
83    pub citation_metadata: Option<CitationMetadata>,
84    /// The grounding metadata for the candidate
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub grounding_metadata: Option<GroundingMetadata>,
87    /// The finish reason for the candidate
88    #[serde(skip_serializing_if = "Option::is_none")]
89    pub finish_reason: Option<FinishReason>,
90    /// The index of the candidate
91    #[serde(skip_serializing_if = "Option::is_none")]
92    pub index: Option<i32>,
93}
94
95/// Metadata about token usage
96#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
97#[serde(rename_all = "camelCase")]
98pub struct UsageMetadata {
99    /// The number of prompt tokens (null if request processing failed)
100    #[serde(skip_serializing_if = "Option::is_none")]
101    pub prompt_token_count: Option<i32>,
102    /// The number of response tokens (null if generation failed)
103    #[serde(skip_serializing_if = "Option::is_none")]
104    pub candidates_token_count: Option<i32>,
105    /// The total number of tokens (null if individual counts unavailable)
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub total_token_count: Option<i32>,
108    /// The number of thinking tokens (Gemini 2.5 series only)
109    #[serde(skip_serializing_if = "Option::is_none")]
110    pub thoughts_token_count: Option<i32>,
111    /// Detailed prompt token information
112    #[serde(skip_serializing_if = "Option::is_none")]
113    pub prompt_tokens_details: Option<Vec<PromptTokenDetails>>,
114    /// The number of cached content tokens (batch API)
115    #[serde(skip_serializing_if = "Option::is_none")]
116    pub cached_content_token_count: Option<i32>,
117    /// Detailed cache token information (batch API)
118    #[serde(skip_serializing_if = "Option::is_none")]
119    pub cache_tokens_details: Option<Vec<PromptTokenDetails>>,
120}
121
122/// Details about prompt tokens by modality
123#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
124#[serde(rename_all = "camelCase")]
125pub struct PromptTokenDetails {
126    /// The modality (e.g., "TEXT")
127    pub modality: Modality,
128    /// Token count for this modality
129    pub token_count: i32,
130}
131
132/// Grounding metadata for responses that use grounding tools
133#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
134#[serde(rename_all = "camelCase")]
135pub struct GroundingMetadata {
136    /// Grounding chunks containing source information
137    #[serde(skip_serializing_if = "Option::is_none")]
138    pub grounding_chunks: Option<Vec<GroundingChunk>>,
139    /// Grounding supports connecting response text to sources
140    #[serde(skip_serializing_if = "Option::is_none")]
141    pub grounding_supports: Option<Vec<GroundingSupport>>,
142    /// Web search queries used for grounding
143    #[serde(skip_serializing_if = "Option::is_none")]
144    pub web_search_queries: Option<Vec<String>>,
145    /// Google Maps widget context token
146    #[serde(skip_serializing_if = "Option::is_none")]
147    pub google_maps_widget_context_token: Option<String>,
148}
149
150/// A chunk of grounding information from a source
151#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
152#[serde(rename_all = "camelCase")]
153pub struct GroundingChunk {
154    /// Maps-specific grounding information
155    #[serde(skip_serializing_if = "Option::is_none")]
156    pub maps: Option<MapsGroundingChunk>,
157    /// Web-specific grounding information
158    #[serde(skip_serializing_if = "Option::is_none")]
159    pub web: Option<WebGroundingChunk>,
160}
161
162/// Maps-specific grounding chunk information
163#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
164#[serde(rename_all = "camelCase")]
165pub struct MapsGroundingChunk {
166    /// The URI of the Maps source
167    pub uri: Url,
168    /// The title of the Maps source
169    pub title: String,
170    /// The place ID from Google Maps
171    #[serde(skip_serializing_if = "Option::is_none")]
172    pub place_id: Option<String>,
173}
174
175/// Web-specific grounding chunk information
176#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
177#[serde(rename_all = "camelCase")]
178pub struct WebGroundingChunk {
179    /// The URI of the web source
180    pub uri: Url,
181    /// The title of the web source
182    pub title: String,
183}
184
185/// Support information connecting response text to grounding sources
186#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
187#[serde(rename_all = "camelCase")]
188pub struct GroundingSupport {
189    /// Segment of the response text
190    pub segment: GroundingSegment,
191    /// Indices of grounding chunks that support this segment
192    pub grounding_chunk_indices: Vec<u32>,
193}
194
195/// A segment of response text
196#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
197#[serde(rename_all = "camelCase")]
198pub struct GroundingSegment {
199    /// Start index of the segment in the response text
200    #[serde(skip_serializing_if = "Option::is_none")]
201    pub start_index: Option<u32>,
202    /// End index of the segment in the response text
203    #[serde(skip_serializing_if = "Option::is_none")]
204    pub end_index: Option<u32>,
205    /// The text content of the segment
206    #[serde(skip_serializing_if = "Option::is_none")]
207    pub text: Option<String>,
208}
209
210/// Response from the Gemini API for content generation
211#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
212#[serde(rename_all = "camelCase")]
213pub struct GenerationResponse {
214    /// The candidates generated
215    #[serde(default, skip_serializing_if = "Vec::is_empty")]
216    pub candidates: Vec<Candidate>,
217    /// The prompt feedback
218    #[serde(skip_serializing_if = "Option::is_none")]
219    pub prompt_feedback: Option<PromptFeedback>,
220    /// Usage metadata
221    #[serde(skip_serializing_if = "Option::is_none")]
222    pub usage_metadata: Option<UsageMetadata>,
223    /// Model version used
224    #[serde(skip_serializing_if = "Option::is_none")]
225    pub model_version: Option<String>,
226    /// Response ID
227    #[serde(skip_serializing_if = "Option::is_none")]
228    pub response_id: Option<String>,
229}
230
231/// Reason why content was blocked
232#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
233#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
234pub enum BlockReason {
235    /// Default value. This value is unused.
236    BlockReasonUnspecified,
237    /// Prompt was blocked due to safety reasons. Inspect safetyRatings to understand which safety category blocked it.
238    Safety,
239    /// Prompt was blocked due to unknown reasons.
240    Other,
241    /// Prompt was blocked due to the terms which are included from the terminology blocklist.
242    Blocklist,
243    /// Prompt was blocked due to prohibited content.
244    ProhibitedContent,
245    /// Candidates blocked due to unsafe image generation content.
246    ImageSafety,
247}
248
249/// Feedback about the prompt
250#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
251#[serde(rename_all = "camelCase")]
252pub struct PromptFeedback {
253    /// The safety ratings for the prompt
254    #[serde(default, skip_serializing_if = "Vec::is_empty")]
255    pub safety_ratings: Vec<SafetyRating>,
256    /// The block reason if the prompt was blocked
257    #[serde(skip_serializing_if = "Option::is_none")]
258    pub block_reason: Option<BlockReason>,
259}
260
261impl GenerationResponse {
262    /// Get the text of the first candidate
263    pub fn text(&self) -> String {
264        self.candidates
265            .first()
266            .and_then(|c| {
267                c.content.parts.as_ref().and_then(|parts| {
268                    parts.first().and_then(|p| match p {
269                        Part::Text {
270                            text,
271                            thought: _,
272                            thought_signature: _,
273                        } => Some(text.clone()),
274                        _ => None,
275                    })
276                })
277            })
278            .unwrap_or_default()
279    }
280
281    /// Get function calls from the response
282    pub fn function_calls(&self) -> Vec<&crate::tools::FunctionCall> {
283        self.candidates
284            .iter()
285            .flat_map(|c| {
286                c.content
287                    .parts
288                    .as_ref()
289                    .map(|parts| {
290                        parts
291                            .iter()
292                            .filter_map(|p| match p {
293                                Part::FunctionCall {
294                                    function_call,
295                                    thought_signature: _,
296                                } => Some(function_call),
297                                _ => None,
298                            })
299                            .collect::<Vec<_>>()
300                    })
301                    .unwrap_or_default()
302            })
303            .collect()
304    }
305
306    /// Get function calls with their thought signatures from the response
307    pub fn function_calls_with_thoughts(
308        &self,
309    ) -> Vec<(&crate::tools::FunctionCall, Option<&String>)> {
310        self.candidates
311            .iter()
312            .flat_map(|c| {
313                c.content
314                    .parts
315                    .as_ref()
316                    .map(|parts| {
317                        parts
318                            .iter()
319                            .filter_map(|p| match p {
320                                Part::FunctionCall {
321                                    function_call,
322                                    thought_signature,
323                                } => Some((function_call, thought_signature.as_ref())),
324                                _ => None,
325                            })
326                            .collect::<Vec<_>>()
327                    })
328                    .unwrap_or_default()
329            })
330            .collect()
331    }
332
333    /// Get thought summaries from the response
334    pub fn thoughts(&self) -> Vec<String> {
335        self.candidates
336            .iter()
337            .flat_map(|c| {
338                c.content
339                    .parts
340                    .as_ref()
341                    .map(|parts| {
342                        parts
343                            .iter()
344                            .filter_map(|p| match p {
345                                Part::Text {
346                                    text,
347                                    thought: Some(true),
348                                    thought_signature: _,
349                                } => Some(text.clone()),
350                                _ => None,
351                            })
352                            .collect::<Vec<_>>()
353                    })
354                    .unwrap_or_default()
355            })
356            .collect()
357    }
358
359    /// Get all text parts (both regular text and thoughts)
360    pub fn all_text(&self) -> Vec<(String, bool)> {
361        self.candidates
362            .iter()
363            .flat_map(|c| {
364                c.content
365                    .parts
366                    .as_ref()
367                    .map(|parts| {
368                        parts
369                            .iter()
370                            .filter_map(|p| match p {
371                                Part::Text {
372                                    text,
373                                    thought,
374                                    thought_signature: _,
375                                } => Some((text.clone(), thought.unwrap_or(false))),
376                                _ => None,
377                            })
378                            .collect::<Vec<_>>()
379                    })
380                    .unwrap_or_default()
381            })
382            .collect()
383    }
384
385    /// Get text parts with their thought signatures from the response
386    pub fn text_with_thoughts(&self) -> Vec<(String, bool, Option<&String>)> {
387        self.candidates
388            .iter()
389            .flat_map(|c| {
390                c.content
391                    .parts
392                    .as_ref()
393                    .map(|parts| {
394                        parts
395                            .iter()
396                            .filter_map(|p| match p {
397                                Part::Text {
398                                    text,
399                                    thought,
400                                    thought_signature,
401                                } => Some((
402                                    text.clone(),
403                                    thought.unwrap_or(false),
404                                    thought_signature.as_ref(),
405                                )),
406                                _ => None,
407                            })
408                            .collect::<Vec<_>>()
409                    })
410                    .unwrap_or_default()
411            })
412            .collect()
413    }
414}
415
416/// Request to generate content
417#[derive(Debug, Clone, Serialize, Deserialize)]
418#[serde(rename_all = "camelCase")]
419pub struct GenerateContentRequest {
420    /// The contents to generate content from
421    pub contents: Vec<Content>,
422    /// The generation config
423    #[serde(skip_serializing_if = "Option::is_none")]
424    pub generation_config: Option<GenerationConfig>,
425    /// The safety settings
426    #[serde(skip_serializing_if = "Option::is_none")]
427    pub safety_settings: Option<Vec<SafetySetting>>,
428    /// The tools that the model can use
429    #[serde(skip_serializing_if = "Option::is_none")]
430    pub tools: Option<Vec<crate::tools::Tool>>,
431    /// The tool config
432    #[serde(skip_serializing_if = "Option::is_none")]
433    pub tool_config: Option<crate::tools::ToolConfig>,
434    /// The system instruction
435    #[serde(skip_serializing_if = "Option::is_none")]
436    pub system_instruction: Option<Content>,
437    /// The cached content to use
438    #[serde(skip_serializing_if = "Option::is_none")]
439    pub cached_content: Option<String>,
440}
441
442/// Thinking level for Gemini 3 series models
443///
444/// Controls the depth of reasoning and analysis that the model applies.
445#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
446#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
447pub enum ThinkingLevel {
448    /// Unspecified thinking level (uses model default)
449    ThinkingLevelUnspecified,
450    /// Minimal thinking level - fastest responses with minimal reasoning
451    Minimal,
452    /// Low thinking level - faster responses with less reasoning
453    Low,
454    /// Medium thinking level - balanced reasoning depth
455    Medium,
456    /// High thinking level - deeper analysis with more comprehensive reasoning
457    High,
458}
459
460/// Media resolution level for images and PDFs
461///
462/// Controls the resolution used when processing inline images and PDF documents,
463/// which affects both quality and token consumption.
464#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
465#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
466pub enum MediaResolutionLevel {
467    /// Unspecified resolution (uses model default)
468    MediaResolutionUnspecified,
469    /// Low resolution - uses fewer tokens, lower quality
470    MediaResolutionLow,
471    /// Medium resolution - balanced token usage and quality
472    MediaResolutionMedium,
473    /// High resolution - uses more tokens, higher quality
474    MediaResolutionHigh,
475}
476
477/// Wrapper struct for per-part media resolution.
478/// Allows fine-grained control over the resolution used for individual inline images and PDFs.
479#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
480pub struct MediaResolution {
481    /// The media resolution level to use
482    pub level: MediaResolutionLevel,
483}
484
485/// Configuration for thinking (Gemini 2.5 and Gemini 3 series)
486///
487/// - For Gemini 2.5 models, use `thinking_budget` and `include_thoughts`.
488/// - For Gemini 3 models, use `thinking_level` (mutually exclusive with `thinking_budget`).
489#[derive(Debug, Clone, Serialize, Deserialize)]
490#[serde(rename_all = "camelCase")]
491pub struct ThinkingConfig {
492    /// The thinking budget (number of thinking tokens)
493    ///
494    /// - Set to 0 to disable thinking
495    /// - Set to -1 for dynamic thinking (model decides)
496    /// - Set to a positive number for a specific token budget
497    ///
498    /// Model-specific ranges:
499    /// - 2.5 Pro: 128 to 32768 (cannot disable thinking)
500    /// - 2.5 Flash: 0 to 24576
501    /// - 2.5 Flash Lite: 512 to 24576
502    #[serde(skip_serializing_if = "Option::is_none")]
503    pub thinking_budget: Option<i32>,
504
505    /// Whether to include thought summaries in the response
506    ///
507    /// When enabled, the response will include synthesized versions of the model's
508    /// raw thoughts, providing insights into the reasoning process.
509    #[serde(skip_serializing_if = "Option::is_none")]
510    pub include_thoughts: Option<bool>,
511
512    /// The thinking level (Required for Gemini 3)
513    ///
514    /// Gemini 3 uses thinking_level (Low/High) which is mutually exclusive with thinking_budget
515    #[serde(skip_serializing_if = "Option::is_none")]
516    pub thinking_level: Option<ThinkingLevel>,
517}
518
519impl ThinkingConfig {
520    /// Create a new thinking config with default settings
521    pub fn new() -> Self {
522        Self {
523            thinking_budget: None,
524            include_thoughts: None,
525            thinking_level: None,
526        }
527    }
528
529    /// Set the thinking budget
530    pub fn with_thinking_budget(mut self, budget: i32) -> Self {
531        self.thinking_budget = Some(budget);
532        self
533    }
534
535    /// Enable dynamic thinking (model decides the budget)
536    pub fn with_dynamic_thinking(mut self) -> Self {
537        self.thinking_budget = Some(-1);
538        self
539    }
540
541    /// Include thought summaries in the response
542    pub fn with_thoughts_included(mut self, include: bool) -> Self {
543        self.include_thoughts = Some(include);
544        self
545    }
546
547    /// Set the thinking level (Required for Gemini 3)
548    pub fn with_thinking_level(mut self, level: ThinkingLevel) -> Self {
549        self.thinking_level = Some(level);
550        self
551    }
552
553    /// Create a thinking config that enables dynamic thinking with thoughts included
554    pub fn dynamic_thinking() -> Self {
555        Self {
556            thinking_budget: Some(-1),
557            include_thoughts: Some(true),
558            thinking_level: None,
559        }
560    }
561}
562
563impl Default for ThinkingConfig {
564    fn default() -> Self {
565        Self::new()
566    }
567}
568
569/// Configuration for generation
570#[derive(Debug, Default, Clone, Serialize, Deserialize)]
571#[serde(rename_all = "camelCase")]
572pub struct GenerationConfig {
573    /// The temperature for the model (0.0 to 1.0)
574    ///
575    /// Controls the randomness of the output. Higher values (e.g., 0.9) make output
576    /// more random, lower values (e.g., 0.1) make output more deterministic.
577    #[serde(skip_serializing_if = "Option::is_none")]
578    pub temperature: Option<f32>,
579
580    /// The top-p value for the model (0.0 to 1.0)
581    ///
582    /// For each token generation step, the model considers the top_p percentage of
583    /// probability mass for potential token choices. Lower values are more selective,
584    /// higher values allow more variety.
585    #[serde(skip_serializing_if = "Option::is_none")]
586    pub top_p: Option<f32>,
587
588    /// The top-k value for the model
589    ///
590    /// For each token generation step, the model considers the top_k most likely tokens.
591    /// Lower values are more selective, higher values allow more variety.
592    #[serde(skip_serializing_if = "Option::is_none")]
593    pub top_k: Option<i32>,
594
595    /// Seed used in decoding.
596    ///
597    /// By default, the model uses a random value for each request if a seed is not provided.
598    /// Setting a specific seed, along with consistent values for other parameters like temperature, can make the model return the same response for repeated requests with the same input.
599    /// Identical outputs are not guaranteed across all runs, due to backend infrastructure variations, but it provides a "best effort" for reproducibility.
600    #[serde(skip_serializing_if = "Option::is_none")]
601    pub seed: Option<i32>,
602
603    /// The maximum number of tokens to generate
604    ///
605    /// Limits the length of the generated content. One token is roughly 4 characters.
606    #[serde(skip_serializing_if = "Option::is_none")]
607    pub max_output_tokens: Option<i32>,
608
609    /// The candidate count
610    ///
611    /// Number of alternative responses to generate.
612    #[serde(skip_serializing_if = "Option::is_none")]
613    pub candidate_count: Option<i32>,
614
615    /// Whether to stop on specific sequences
616    ///
617    /// The model will stop generating content when it encounters any of these sequences.
618    #[serde(skip_serializing_if = "Option::is_none")]
619    pub stop_sequences: Option<Vec<String>>,
620
621    /// The response mime type
622    ///
623    /// Specifies the format of the model's response.
624    #[serde(skip_serializing_if = "Option::is_none")]
625    pub response_mime_type: Option<String>,
626    /// The response schema
627    ///
628    /// Specifies the JSON schema for structured responses.
629    #[serde(skip_serializing_if = "Option::is_none")]
630    pub response_schema: Option<serde_json::Value>,
631
632    /// Response modalities (for TTS and other multimodal outputs)
633    #[serde(skip_serializing_if = "Option::is_none")]
634    pub response_modalities: Option<Vec<String>>,
635
636    /// Optional. Config for image generation. An error will be returned if this field is set for models
637    /// that don't support these config options.
638    #[serde(skip_serializing_if = "Option::is_none")]
639    pub image_config: Option<ImageConfig>,
640
641    /// Speech configuration for text-to-speech generation
642    #[serde(skip_serializing_if = "Option::is_none")]
643    pub speech_config: Option<SpeechConfig>,
644
645    /// The thinking configuration
646    ///
647    /// Configuration for the model's thinking process (Gemini 2.5 and Gemini 3 series).
648    #[serde(skip_serializing_if = "Option::is_none")]
649    pub thinking_config: Option<ThinkingConfig>,
650
651    /// Global media resolution for all images and PDFs.
652    /// Controls the resolution used for inline image and PDF data, affecting token usage.
653    /// Can be overridden per-part using the Part::InlineData media_resolution field.
654    #[serde(skip_serializing_if = "Option::is_none", rename = "media_resolution")]
655    pub media_resolution: Option<MediaResolutionLevel>,
656}
657
658/// Response from the Gemini API for token counting
659#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
660#[serde(rename_all = "camelCase")]
661pub struct CountTokensResponse {
662    /// The total number of tokens counted across all instances.
663    pub total_tokens: u32,
664    /// The total number of tokens in the cached content.
665    #[serde(skip_serializing_if = "Option::is_none")]
666    pub cached_content_token_count: Option<u32>,
667}
668
669/// Config for image generation features.
670#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
671#[serde(rename_all = "camelCase")]
672pub struct ImageConfig {
673    /// Optional. The aspect ratio of the image to generate. Supported aspect ratios: 1:1, 2:3, 3:2, 3:4,
674    /// 4:3, 9:16, 16:9, 21:9.
675    ///
676    /// If not specified, the model will choose a default aspect ratio based on any reference images
677    /// provided.
678    #[serde(skip_serializing_if = "Option::is_none")]
679    pub aspect_ratio: Option<String>,
680    /// Optional. Specifies the size of generated images. Supported values are `1K`, `2K`, `4K`. If not
681    /// specified, the model will use default value `1K`.
682    #[serde(skip_serializing_if = "Option::is_none")]
683    pub image_size: Option<String>,
684}
685
686/// Configuration for speech generation (text-to-speech)
687#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
688#[serde(rename_all = "camelCase")]
689pub struct SpeechConfig {
690    /// Single voice configuration
691    #[serde(skip_serializing_if = "Option::is_none")]
692    pub voice_config: Option<VoiceConfig>,
693    /// Multi-speaker voice configuration
694    #[serde(skip_serializing_if = "Option::is_none")]
695    pub multi_speaker_voice_config: Option<MultiSpeakerVoiceConfig>,
696}
697
698/// Voice configuration for text-to-speech
699#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
700#[serde(rename_all = "camelCase")]
701pub struct VoiceConfig {
702    /// Prebuilt voice configuration
703    #[serde(skip_serializing_if = "Option::is_none")]
704    pub prebuilt_voice_config: Option<PrebuiltVoiceConfig>,
705}
706
707/// Prebuilt voice configuration
708#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
709#[serde(rename_all = "camelCase")]
710pub struct PrebuiltVoiceConfig {
711    /// The name of the voice to use
712    pub voice_name: String,
713}
714
715/// Multi-speaker voice configuration
716#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
717#[serde(rename_all = "camelCase")]
718pub struct MultiSpeakerVoiceConfig {
719    /// Configuration for each speaker
720    pub speaker_voice_configs: Vec<SpeakerVoiceConfig>,
721}
722
723/// Configuration for a specific speaker in multi-speaker TTS
724#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
725#[serde(rename_all = "camelCase")]
726pub struct SpeakerVoiceConfig {
727    /// The name of the speaker (must match the name used in the prompt)
728    pub speaker: String,
729    /// Voice configuration for this speaker
730    pub voice_config: VoiceConfig,
731}
732
733impl SpeechConfig {
734    /// Create a new speech config with a single voice
735    pub fn single_voice(voice_name: impl Into<String>) -> Self {
736        Self {
737            voice_config: Some(VoiceConfig {
738                prebuilt_voice_config: Some(PrebuiltVoiceConfig {
739                    voice_name: voice_name.into(),
740                }),
741            }),
742            multi_speaker_voice_config: None,
743        }
744    }
745
746    /// Create a new speech config with multiple speakers
747    pub fn multi_speaker(speakers: Vec<SpeakerVoiceConfig>) -> Self {
748        Self {
749            voice_config: None,
750            multi_speaker_voice_config: Some(MultiSpeakerVoiceConfig {
751                speaker_voice_configs: speakers,
752            }),
753        }
754    }
755}
756
757impl SpeakerVoiceConfig {
758    /// Create a new speaker voice configuration
759    pub fn new(speaker: impl Into<String>, voice_name: impl Into<String>) -> Self {
760        Self {
761            speaker: speaker.into(),
762            voice_config: VoiceConfig {
763                prebuilt_voice_config: Some(PrebuiltVoiceConfig {
764                    voice_name: voice_name.into(),
765                }),
766            },
767        }
768    }
769}