gemini_rust/generation/
model.rs

1use reqwest::Url;
2use serde::{Deserialize, Serialize};
3use time::OffsetDateTime;
4
5use crate::{
6    safety::{SafetyRating, SafetySetting},
7    Content, Modality, Part,
8};
9
10/// Reason why generation finished
11#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
12#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
13pub enum FinishReason {
14    /// Default value. This value is unused.
15    FinishReasonUnspecified,
16    /// Natural stop point of the model or provided stop sequence.
17    Stop,
18    /// The maximum number of tokens as specified in the request was reached.
19    MaxTokens,
20    /// The response candidate content was flagged for safety reasons.
21    Safety,
22    /// The response candidate content was flagged for recitation reasons.
23    Recitation,
24    /// The response candidate content was flagged for using an unsupported language.
25    Language,
26    /// Unknown reason.
27    Other,
28    /// Token generation stopped because the content contains forbidden terms.
29    Blocklist,
30    /// Token generation stopped for potentially containing prohibited content.
31    ProhibitedContent,
32    /// Token generation stopped because the content potentially contains Sensitive Personally Identifiable Information (SPII).
33    Spii,
34    /// The function call generated by the model is invalid.
35    MalformedFunctionCall,
36    /// Token generation stopped because generated images contain safety violations.
37    ImageSafety,
38    /// Model generated a tool call but no tools were enabled in the request.
39    UnexpectedToolCall,
40    /// Model called too many tools consecutively, thus the system exited execution.
41    TooManyToolCalls,
42}
43
44/// Citation metadata for content
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
46#[serde(rename_all = "camelCase")]
47pub struct CitationMetadata {
48    /// The citation sources
49    pub citation_sources: Vec<CitationSource>,
50}
51
52/// Citation source
53#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
54#[serde(rename_all = "camelCase")]
55pub struct CitationSource {
56    /// The URI of the citation source
57    pub uri: Option<String>,
58    /// The title of the citation source
59    pub title: Option<String>,
60    /// The start index of the citation in the response
61    pub start_index: Option<i32>,
62    /// The end index of the citation in the response
63    pub end_index: Option<i32>,
64    /// The license of the citation source
65    pub license: Option<String>,
66    /// The publication date of the citation source
67    #[serde(default, with = "time::serde::rfc3339::option")]
68    pub publication_date: Option<OffsetDateTime>,
69}
70
71/// A candidate response
72#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
73#[serde(rename_all = "camelCase")]
74pub struct Candidate {
75    /// The content of the candidate
76    #[serde(default)]
77    pub content: Content,
78    /// The safety ratings for the candidate
79    #[serde(skip_serializing_if = "Option::is_none")]
80    pub safety_ratings: Option<Vec<SafetyRating>>,
81    /// The citation metadata for the candidate
82    #[serde(skip_serializing_if = "Option::is_none")]
83    pub citation_metadata: Option<CitationMetadata>,
84    /// The grounding metadata for the candidate
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub grounding_metadata: Option<GroundingMetadata>,
87    /// The finish reason for the candidate
88    #[serde(skip_serializing_if = "Option::is_none")]
89    pub finish_reason: Option<FinishReason>,
90    /// The index of the candidate
91    #[serde(skip_serializing_if = "Option::is_none")]
92    pub index: Option<i32>,
93}
94
95/// Metadata about token usage
96#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
97#[serde(rename_all = "camelCase")]
98pub struct UsageMetadata {
99    /// The number of prompt tokens (null if request processing failed)
100    #[serde(skip_serializing_if = "Option::is_none")]
101    pub prompt_token_count: Option<i32>,
102    /// The number of response tokens (null if generation failed)
103    #[serde(skip_serializing_if = "Option::is_none")]
104    pub candidates_token_count: Option<i32>,
105    /// The total number of tokens (null if individual counts unavailable)
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub total_token_count: Option<i32>,
108    /// The number of thinking tokens (Gemini 2.5 series only)
109    #[serde(skip_serializing_if = "Option::is_none")]
110    pub thoughts_token_count: Option<i32>,
111    /// Detailed prompt token information
112    #[serde(skip_serializing_if = "Option::is_none")]
113    pub prompt_tokens_details: Option<Vec<PromptTokenDetails>>,
114    /// The number of cached content tokens (batch API)
115    #[serde(skip_serializing_if = "Option::is_none")]
116    pub cached_content_token_count: Option<i32>,
117    /// Detailed cache token information (batch API)
118    #[serde(skip_serializing_if = "Option::is_none")]
119    pub cache_tokens_details: Option<Vec<PromptTokenDetails>>,
120}
121
122/// Details about prompt tokens by modality
123#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
124#[serde(rename_all = "camelCase")]
125pub struct PromptTokenDetails {
126    /// The modality (e.g., "TEXT")
127    pub modality: Modality,
128    /// Token count for this modality
129    pub token_count: i32,
130}
131
132/// Grounding metadata for responses that use grounding tools
133#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
134#[serde(rename_all = "camelCase")]
135pub struct GroundingMetadata {
136    /// Grounding chunks containing source information
137    #[serde(skip_serializing_if = "Option::is_none")]
138    pub grounding_chunks: Option<Vec<GroundingChunk>>,
139    /// Grounding supports connecting response text to sources
140    #[serde(skip_serializing_if = "Option::is_none")]
141    pub grounding_supports: Option<Vec<GroundingSupport>>,
142    /// Web search queries used for grounding
143    #[serde(skip_serializing_if = "Option::is_none")]
144    pub web_search_queries: Option<Vec<String>>,
145    /// Google Maps widget context token
146    #[serde(skip_serializing_if = "Option::is_none")]
147    pub google_maps_widget_context_token: Option<String>,
148}
149
150/// A chunk of grounding information from a source
151#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
152#[serde(rename_all = "camelCase")]
153pub struct GroundingChunk {
154    /// Maps-specific grounding information
155    #[serde(skip_serializing_if = "Option::is_none")]
156    pub maps: Option<MapsGroundingChunk>,
157    /// Web-specific grounding information
158    #[serde(skip_serializing_if = "Option::is_none")]
159    pub web: Option<WebGroundingChunk>,
160}
161
162/// Maps-specific grounding chunk information
163#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
164#[serde(rename_all = "camelCase")]
165pub struct MapsGroundingChunk {
166    /// The URI of the Maps source
167    pub uri: Url,
168    /// The title of the Maps source
169    pub title: String,
170    /// The place ID from Google Maps
171    #[serde(skip_serializing_if = "Option::is_none")]
172    pub place_id: Option<String>,
173}
174
175/// Web-specific grounding chunk information
176#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
177#[serde(rename_all = "camelCase")]
178pub struct WebGroundingChunk {
179    /// The URI of the web source
180    pub uri: Url,
181    /// The title of the web source
182    pub title: String,
183}
184
185/// Support information connecting response text to grounding sources
186#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
187#[serde(rename_all = "camelCase")]
188pub struct GroundingSupport {
189    /// Segment of the response text
190    pub segment: GroundingSegment,
191    /// Indices of grounding chunks that support this segment
192    pub grounding_chunk_indices: Vec<u32>,
193}
194
195/// A segment of response text
196#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
197#[serde(rename_all = "camelCase")]
198pub struct GroundingSegment {
199    /// Start index of the segment in the response text
200    #[serde(skip_serializing_if = "Option::is_none")]
201    pub start_index: Option<u32>,
202    /// End index of the segment in the response text
203    #[serde(skip_serializing_if = "Option::is_none")]
204    pub end_index: Option<u32>,
205    /// The text content of the segment
206    #[serde(skip_serializing_if = "Option::is_none")]
207    pub text: Option<String>,
208}
209
210/// Response from the Gemini API for content generation
211#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
212#[serde(rename_all = "camelCase")]
213pub struct GenerationResponse {
214    /// The candidates generated
215    #[serde(default, skip_serializing_if = "Vec::is_empty")]
216    pub candidates: Vec<Candidate>,
217    /// The prompt feedback
218    #[serde(skip_serializing_if = "Option::is_none")]
219    pub prompt_feedback: Option<PromptFeedback>,
220    /// Usage metadata
221    #[serde(skip_serializing_if = "Option::is_none")]
222    pub usage_metadata: Option<UsageMetadata>,
223    /// Model version used
224    #[serde(skip_serializing_if = "Option::is_none")]
225    pub model_version: Option<String>,
226    /// Response ID
227    #[serde(skip_serializing_if = "Option::is_none")]
228    pub response_id: Option<String>,
229}
230
231/// Reason why content was blocked
232#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
233#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
234pub enum BlockReason {
235    /// Default value. This value is unused.
236    BlockReasonUnspecified,
237    /// Prompt was blocked due to safety reasons. Inspect safetyRatings to understand which safety category blocked it.
238    Safety,
239    /// Prompt was blocked due to unknown reasons.
240    Other,
241    /// Prompt was blocked due to the terms which are included from the terminology blocklist.
242    Blocklist,
243    /// Prompt was blocked due to prohibited content.
244    ProhibitedContent,
245    /// Candidates blocked due to unsafe image generation content.
246    ImageSafety,
247}
248
249/// Feedback about the prompt
250#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
251#[serde(rename_all = "camelCase")]
252pub struct PromptFeedback {
253    /// The safety ratings for the prompt
254    #[serde(default, skip_serializing_if = "Vec::is_empty")]
255    pub safety_ratings: Vec<SafetyRating>,
256    /// The block reason if the prompt was blocked
257    #[serde(skip_serializing_if = "Option::is_none")]
258    pub block_reason: Option<BlockReason>,
259}
260
261impl GenerationResponse {
262    /// Get the text of the first candidate
263    pub fn text(&self) -> String {
264        self.candidates
265            .first()
266            .and_then(|c| {
267                c.content.parts.as_ref().and_then(|parts| {
268                    parts.first().and_then(|p| match p {
269                        Part::Text {
270                            text,
271                            thought: _,
272                            thought_signature: _,
273                        } => Some(text.clone()),
274                        _ => None,
275                    })
276                })
277            })
278            .unwrap_or_default()
279    }
280
281    /// Get function calls from the response
282    pub fn function_calls(&self) -> Vec<&crate::tools::FunctionCall> {
283        self.candidates
284            .iter()
285            .flat_map(|c| {
286                c.content
287                    .parts
288                    .as_ref()
289                    .map(|parts| {
290                        parts
291                            .iter()
292                            .filter_map(|p| match p {
293                                Part::FunctionCall {
294                                    function_call,
295                                    thought_signature: _,
296                                } => Some(function_call),
297                                _ => None,
298                            })
299                            .collect::<Vec<_>>()
300                    })
301                    .unwrap_or_default()
302            })
303            .collect()
304    }
305
306    /// Get function calls with their thought signatures from the response
307    pub fn function_calls_with_thoughts(
308        &self,
309    ) -> Vec<(&crate::tools::FunctionCall, Option<&String>)> {
310        self.candidates
311            .iter()
312            .flat_map(|c| {
313                c.content
314                    .parts
315                    .as_ref()
316                    .map(|parts| {
317                        parts
318                            .iter()
319                            .filter_map(|p| match p {
320                                Part::FunctionCall {
321                                    function_call,
322                                    thought_signature,
323                                } => Some((function_call, thought_signature.as_ref())),
324                                _ => None,
325                            })
326                            .collect::<Vec<_>>()
327                    })
328                    .unwrap_or_default()
329            })
330            .collect()
331    }
332
333    /// Get thought summaries from the response
334    pub fn thoughts(&self) -> Vec<String> {
335        self.candidates
336            .iter()
337            .flat_map(|c| {
338                c.content
339                    .parts
340                    .as_ref()
341                    .map(|parts| {
342                        parts
343                            .iter()
344                            .filter_map(|p| match p {
345                                Part::Text {
346                                    text,
347                                    thought: Some(true),
348                                    thought_signature: _,
349                                } => Some(text.clone()),
350                                _ => None,
351                            })
352                            .collect::<Vec<_>>()
353                    })
354                    .unwrap_or_default()
355            })
356            .collect()
357    }
358
359    /// Get all text parts (both regular text and thoughts)
360    pub fn all_text(&self) -> Vec<(String, bool)> {
361        self.candidates
362            .iter()
363            .flat_map(|c| {
364                c.content
365                    .parts
366                    .as_ref()
367                    .map(|parts| {
368                        parts
369                            .iter()
370                            .filter_map(|p| match p {
371                                Part::Text {
372                                    text,
373                                    thought,
374                                    thought_signature: _,
375                                } => Some((text.clone(), thought.unwrap_or(false))),
376                                _ => None,
377                            })
378                            .collect::<Vec<_>>()
379                    })
380                    .unwrap_or_default()
381            })
382            .collect()
383    }
384
385    /// Get text parts with their thought signatures from the response
386    pub fn text_with_thoughts(&self) -> Vec<(String, bool, Option<&String>)> {
387        self.candidates
388            .iter()
389            .flat_map(|c| {
390                c.content
391                    .parts
392                    .as_ref()
393                    .map(|parts| {
394                        parts
395                            .iter()
396                            .filter_map(|p| match p {
397                                Part::Text {
398                                    text,
399                                    thought,
400                                    thought_signature,
401                                } => Some((
402                                    text.clone(),
403                                    thought.unwrap_or(false),
404                                    thought_signature.as_ref(),
405                                )),
406                                _ => None,
407                            })
408                            .collect::<Vec<_>>()
409                    })
410                    .unwrap_or_default()
411            })
412            .collect()
413    }
414}
415
416/// Request to generate content
417#[derive(Debug, Clone, Serialize, Deserialize)]
418#[serde(rename_all = "camelCase")]
419pub struct GenerateContentRequest {
420    /// The contents to generate content from
421    pub contents: Vec<Content>,
422    /// The generation config
423    #[serde(skip_serializing_if = "Option::is_none")]
424    pub generation_config: Option<GenerationConfig>,
425    /// The safety settings
426    #[serde(skip_serializing_if = "Option::is_none")]
427    pub safety_settings: Option<Vec<SafetySetting>>,
428    /// The tools that the model can use
429    #[serde(skip_serializing_if = "Option::is_none")]
430    pub tools: Option<Vec<crate::tools::Tool>>,
431    /// The tool config
432    #[serde(skip_serializing_if = "Option::is_none")]
433    pub tool_config: Option<crate::tools::ToolConfig>,
434    /// The system instruction
435    #[serde(skip_serializing_if = "Option::is_none")]
436    pub system_instruction: Option<Content>,
437    /// The cached content to use
438    #[serde(skip_serializing_if = "Option::is_none")]
439    pub cached_content: Option<String>,
440}
441
442/// Thinking level for Gemini 3 Pro models
443///
444/// Controls the depth of reasoning and analysis the model applies.
445#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
446#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
447pub enum ThinkingLevel {
448    /// Unspecified thinking level (uses model default)
449    ThinkingLevelUnspecified,
450    /// Low thinking level - faster responses with less reasoning
451    Low,
452    /// High thinking level - deeper analysis with more comprehensive reasoning
453    High,
454}
455
456/// Media resolution level for images and PDFs
457///
458/// Controls the resolution used when processing inline images and PDF documents,
459/// which affects both quality and token consumption.
460#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
461#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
462pub enum MediaResolutionLevel {
463    /// Unspecified resolution (uses model default)
464    MediaResolutionUnspecified,
465    /// Low resolution - uses fewer tokens, lower quality
466    MediaResolutionLow,
467    /// Medium resolution - balanced token usage and quality
468    MediaResolutionMedium,
469    /// High resolution - uses more tokens, higher quality
470    MediaResolutionHigh,
471}
472
473/// Wrapper struct for per-part media resolution.
474/// Allows fine-grained control over the resolution used for individual inline images and PDFs.
475#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
476pub struct MediaResolution {
477    /// The media resolution level to use
478    pub level: MediaResolutionLevel,
479}
480
481/// Configuration for thinking (Gemini 2.5 and Gemini 3 series)
482///
483/// - For Gemini 2.5 models, use `thinking_budget` and `include_thoughts`.
484/// - For Gemini 3 models, use `thinking_level` (mutually exclusive with `thinking_budget`).
485#[derive(Debug, Clone, Serialize, Deserialize)]
486#[serde(rename_all = "camelCase")]
487pub struct ThinkingConfig {
488    /// The thinking budget (number of thinking tokens)
489    ///
490    /// - Set to 0 to disable thinking
491    /// - Set to -1 for dynamic thinking (model decides)
492    /// - Set to a positive number for a specific token budget
493    ///
494    /// Model-specific ranges:
495    /// - 2.5 Pro: 128 to 32768 (cannot disable thinking)
496    /// - 2.5 Flash: 0 to 24576
497    /// - 2.5 Flash Lite: 512 to 24576
498    #[serde(skip_serializing_if = "Option::is_none")]
499    pub thinking_budget: Option<i32>,
500
501    /// Whether to include thought summaries in the response
502    ///
503    /// When enabled, the response will include synthesized versions of the model's
504    /// raw thoughts, providing insights into the reasoning process.
505    #[serde(skip_serializing_if = "Option::is_none")]
506    pub include_thoughts: Option<bool>,
507
508    /// The thinking level (Required for Gemini 3)
509    ///
510    /// Gemini 3 uses thinking_level (Low/High) which is mutually exclusive with thinking_budget
511    #[serde(skip_serializing_if = "Option::is_none")]
512    pub thinking_level: Option<ThinkingLevel>,
513}
514
515impl ThinkingConfig {
516    /// Create a new thinking config with default settings
517    pub fn new() -> Self {
518        Self {
519            thinking_budget: None,
520            include_thoughts: None,
521            thinking_level: None,
522        }
523    }
524
525    /// Set the thinking budget
526    pub fn with_thinking_budget(mut self, budget: i32) -> Self {
527        self.thinking_budget = Some(budget);
528        self
529    }
530
531    /// Enable dynamic thinking (model decides the budget)
532    pub fn with_dynamic_thinking(mut self) -> Self {
533        self.thinking_budget = Some(-1);
534        self
535    }
536
537    /// Include thought summaries in the response
538    pub fn with_thoughts_included(mut self, include: bool) -> Self {
539        self.include_thoughts = Some(include);
540        self
541    }
542
543    /// Set the thinking level (Required for Gemini 3)
544    pub fn with_thinking_level(mut self, level: ThinkingLevel) -> Self {
545        self.thinking_level = Some(level);
546        self
547    }
548
549    /// Create a thinking config that enables dynamic thinking with thoughts included
550    pub fn dynamic_thinking() -> Self {
551        Self {
552            thinking_budget: Some(-1),
553            include_thoughts: Some(true),
554            thinking_level: None,
555        }
556    }
557}
558
559impl Default for ThinkingConfig {
560    fn default() -> Self {
561        Self::new()
562    }
563}
564
565/// Configuration for generation
566#[derive(Debug, Default, Clone, Serialize, Deserialize)]
567#[serde(rename_all = "camelCase")]
568pub struct GenerationConfig {
569    /// The temperature for the model (0.0 to 1.0)
570    ///
571    /// Controls the randomness of the output. Higher values (e.g., 0.9) make output
572    /// more random, lower values (e.g., 0.1) make output more deterministic.
573    #[serde(skip_serializing_if = "Option::is_none")]
574    pub temperature: Option<f32>,
575
576    /// The top-p value for the model (0.0 to 1.0)
577    ///
578    /// For each token generation step, the model considers the top_p percentage of
579    /// probability mass for potential token choices. Lower values are more selective,
580    /// higher values allow more variety.
581    #[serde(skip_serializing_if = "Option::is_none")]
582    pub top_p: Option<f32>,
583
584    /// The top-k value for the model
585    ///
586    /// For each token generation step, the model considers the top_k most likely tokens.
587    /// Lower values are more selective, higher values allow more variety.
588    #[serde(skip_serializing_if = "Option::is_none")]
589    pub top_k: Option<i32>,
590
591    /// Seed used in decoding.
592    ///
593    /// By default, the model uses a random value for each request if a seed is not provided.
594    /// Setting a specific seed, along with consistent values for other parameters like temperature, can make the model return the same response for repeated requests with the same input.
595    /// Identical outputs are not guaranteed across all runs, due to backend infrastructure variations, but it provides a "best effort" for reproducibility.
596    #[serde(skip_serializing_if = "Option::is_none")]
597    pub seed: Option<i32>,
598
599    /// The maximum number of tokens to generate
600    ///
601    /// Limits the length of the generated content. One token is roughly 4 characters.
602    #[serde(skip_serializing_if = "Option::is_none")]
603    pub max_output_tokens: Option<i32>,
604
605    /// The candidate count
606    ///
607    /// Number of alternative responses to generate.
608    #[serde(skip_serializing_if = "Option::is_none")]
609    pub candidate_count: Option<i32>,
610
611    /// Whether to stop on specific sequences
612    ///
613    /// The model will stop generating content when it encounters any of these sequences.
614    #[serde(skip_serializing_if = "Option::is_none")]
615    pub stop_sequences: Option<Vec<String>>,
616
617    /// The response mime type
618    ///
619    /// Specifies the format of the model's response.
620    #[serde(skip_serializing_if = "Option::is_none")]
621    pub response_mime_type: Option<String>,
622    /// The response schema
623    ///
624    /// Specifies the JSON schema for structured responses.
625    #[serde(skip_serializing_if = "Option::is_none")]
626    pub response_schema: Option<serde_json::Value>,
627
628    /// Response modalities (for TTS and other multimodal outputs)
629    #[serde(skip_serializing_if = "Option::is_none")]
630    pub response_modalities: Option<Vec<String>>,
631
632    /// Optional. Config for image generation. An error will be returned if this field is set for models
633    /// that don't support these config options.
634    #[serde(skip_serializing_if = "Option::is_none")]
635    pub image_config: Option<ImageConfig>,
636
637    /// Speech configuration for text-to-speech generation
638    #[serde(skip_serializing_if = "Option::is_none")]
639    pub speech_config: Option<SpeechConfig>,
640
641    /// The thinking configuration
642    ///
643    /// Configuration for the model's thinking process (Gemini 2.5 and Gemini 3 series).
644    #[serde(skip_serializing_if = "Option::is_none")]
645    pub thinking_config: Option<ThinkingConfig>,
646
647    /// Global media resolution for all images and PDFs.
648    /// Controls the resolution used for inline image and PDF data, affecting token usage.
649    /// Can be overridden per-part using the Part::InlineData media_resolution field.
650    #[serde(skip_serializing_if = "Option::is_none", rename = "media_resolution")]
651    pub media_resolution: Option<MediaResolutionLevel>,
652}
653
654/// Response from the Gemini API for token counting
655#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
656#[serde(rename_all = "camelCase")]
657pub struct CountTokensResponse {
658    /// The total number of tokens counted across all instances.
659    pub total_tokens: u32,
660    /// The total number of tokens in the cached content.
661    #[serde(skip_serializing_if = "Option::is_none")]
662    pub cached_content_token_count: Option<u32>,
663}
664
665/// Config for image generation features.
666#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
667#[serde(rename_all = "camelCase")]
668pub struct ImageConfig {
669    /// Optional. The aspect ratio of the image to generate. Supported aspect ratios: 1:1, 2:3, 3:2, 3:4,
670    /// 4:3, 9:16, 16:9, 21:9.
671    ///
672    /// If not specified, the model will choose a default aspect ratio based on any reference images
673    /// provided.
674    #[serde(skip_serializing_if = "Option::is_none")]
675    pub aspect_ratio: Option<String>,
676    /// Optional. Specifies the size of generated images. Supported values are `1K`, `2K`, `4K`. If not
677    /// specified, the model will use default value `1K`.
678    #[serde(skip_serializing_if = "Option::is_none")]
679    pub image_size: Option<String>,
680}
681
682/// Configuration for speech generation (text-to-speech)
683#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
684#[serde(rename_all = "camelCase")]
685pub struct SpeechConfig {
686    /// Single voice configuration
687    #[serde(skip_serializing_if = "Option::is_none")]
688    pub voice_config: Option<VoiceConfig>,
689    /// Multi-speaker voice configuration
690    #[serde(skip_serializing_if = "Option::is_none")]
691    pub multi_speaker_voice_config: Option<MultiSpeakerVoiceConfig>,
692}
693
694/// Voice configuration for text-to-speech
695#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
696#[serde(rename_all = "camelCase")]
697pub struct VoiceConfig {
698    /// Prebuilt voice configuration
699    #[serde(skip_serializing_if = "Option::is_none")]
700    pub prebuilt_voice_config: Option<PrebuiltVoiceConfig>,
701}
702
703/// Prebuilt voice configuration
704#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
705#[serde(rename_all = "camelCase")]
706pub struct PrebuiltVoiceConfig {
707    /// The name of the voice to use
708    pub voice_name: String,
709}
710
711/// Multi-speaker voice configuration
712#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
713#[serde(rename_all = "camelCase")]
714pub struct MultiSpeakerVoiceConfig {
715    /// Configuration for each speaker
716    pub speaker_voice_configs: Vec<SpeakerVoiceConfig>,
717}
718
719/// Configuration for a specific speaker in multi-speaker TTS
720#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
721#[serde(rename_all = "camelCase")]
722pub struct SpeakerVoiceConfig {
723    /// The name of the speaker (must match the name used in the prompt)
724    pub speaker: String,
725    /// Voice configuration for this speaker
726    pub voice_config: VoiceConfig,
727}
728
729impl SpeechConfig {
730    /// Create a new speech config with a single voice
731    pub fn single_voice(voice_name: impl Into<String>) -> Self {
732        Self {
733            voice_config: Some(VoiceConfig {
734                prebuilt_voice_config: Some(PrebuiltVoiceConfig {
735                    voice_name: voice_name.into(),
736                }),
737            }),
738            multi_speaker_voice_config: None,
739        }
740    }
741
742    /// Create a new speech config with multiple speakers
743    pub fn multi_speaker(speakers: Vec<SpeakerVoiceConfig>) -> Self {
744        Self {
745            voice_config: None,
746            multi_speaker_voice_config: Some(MultiSpeakerVoiceConfig {
747                speaker_voice_configs: speakers,
748            }),
749        }
750    }
751}
752
753impl SpeakerVoiceConfig {
754    /// Create a new speaker voice configuration
755    pub fn new(speaker: impl Into<String>, voice_name: impl Into<String>) -> Self {
756        Self {
757            speaker: speaker.into(),
758            voice_config: VoiceConfig {
759                prebuilt_voice_config: Some(PrebuiltVoiceConfig {
760                    voice_name: voice_name.into(),
761                }),
762            },
763        }
764    }
765}