gemini_rust/generation/
model.rs

1use serde::{Deserialize, Serialize};
2use time::OffsetDateTime;
3
4use crate::{
5    safety::{SafetyRating, SafetySetting},
6    Content, Modality, Part,
7};
8
9/// Reason why generation finished
10#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
11#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
12pub enum FinishReason {
13    /// Default value. This value is unused.
14    FinishReasonUnspecified,
15    /// Natural stop point of the model or provided stop sequence.
16    Stop,
17    /// The maximum number of tokens as specified in the request was reached.
18    MaxTokens,
19    /// The response candidate content was flagged for safety reasons.
20    Safety,
21    /// The response candidate content was flagged for recitation reasons.
22    Recitation,
23    /// The response candidate content was flagged for using an unsupported language.
24    Language,
25    /// Unknown reason.
26    Other,
27    /// Token generation stopped because the content contains forbidden terms.
28    Blocklist,
29    /// Token generation stopped for potentially containing prohibited content.
30    ProhibitedContent,
31    /// Token generation stopped because the content potentially contains Sensitive Personally Identifiable Information (SPII).
32    Spii,
33    /// The function call generated by the model is invalid.
34    MalformedFunctionCall,
35    /// Token generation stopped because generated images contain safety violations.
36    ImageSafety,
37    /// Model generated a tool call but no tools were enabled in the request.
38    UnexpectedToolCall,
39    /// Model called too many tools consecutively, thus the system exited execution.
40    TooManyToolCalls,
41}
42
43/// Citation metadata for content
44#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
45#[serde(rename_all = "camelCase")]
46pub struct CitationMetadata {
47    /// The citation sources
48    pub citation_sources: Vec<CitationSource>,
49}
50
51/// Citation source
52#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
53#[serde(rename_all = "camelCase")]
54pub struct CitationSource {
55    /// The URI of the citation source
56    pub uri: Option<String>,
57    /// The title of the citation source
58    pub title: Option<String>,
59    /// The start index of the citation in the response
60    pub start_index: Option<i32>,
61    /// The end index of the citation in the response
62    pub end_index: Option<i32>,
63    /// The license of the citation source
64    pub license: Option<String>,
65    /// The publication date of the citation source
66    #[serde(default, with = "time::serde::rfc3339::option")]
67    pub publication_date: Option<OffsetDateTime>,
68}
69
70/// A candidate response
71#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
72#[serde(rename_all = "camelCase")]
73pub struct Candidate {
74    /// The content of the candidate
75    #[serde(default)]
76    pub content: Content,
77    /// The safety ratings for the candidate
78    #[serde(skip_serializing_if = "Option::is_none")]
79    pub safety_ratings: Option<Vec<SafetyRating>>,
80    /// The citation metadata for the candidate
81    #[serde(skip_serializing_if = "Option::is_none")]
82    pub citation_metadata: Option<CitationMetadata>,
83    /// The finish reason for the candidate
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub finish_reason: Option<FinishReason>,
86    /// The index of the candidate
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub index: Option<i32>,
89}
90
91/// Metadata about token usage
92#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
93#[serde(rename_all = "camelCase")]
94pub struct UsageMetadata {
95    /// The number of prompt tokens (null if request processing failed)
96    #[serde(skip_serializing_if = "Option::is_none")]
97    pub prompt_token_count: Option<i32>,
98    /// The number of response tokens (null if generation failed)
99    #[serde(skip_serializing_if = "Option::is_none")]
100    pub candidates_token_count: Option<i32>,
101    /// The total number of tokens (null if individual counts unavailable)
102    #[serde(skip_serializing_if = "Option::is_none")]
103    pub total_token_count: Option<i32>,
104    /// The number of thinking tokens (Gemini 2.5 series only)
105    #[serde(skip_serializing_if = "Option::is_none")]
106    pub thoughts_token_count: Option<i32>,
107    /// Detailed prompt token information
108    #[serde(skip_serializing_if = "Option::is_none")]
109    pub prompt_tokens_details: Option<Vec<PromptTokenDetails>>,
110    /// The number of cached content tokens (batch API)
111    #[serde(skip_serializing_if = "Option::is_none")]
112    pub cached_content_token_count: Option<i32>,
113    /// Detailed cache token information (batch API)
114    #[serde(skip_serializing_if = "Option::is_none")]
115    pub cache_tokens_details: Option<Vec<PromptTokenDetails>>,
116}
117
118/// Details about prompt tokens by modality
119#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
120#[serde(rename_all = "camelCase")]
121pub struct PromptTokenDetails {
122    /// The modality (e.g., "TEXT")
123    pub modality: Modality,
124    /// Token count for this modality
125    pub token_count: i32,
126}
127
128/// Response from the Gemini API for content generation
129#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
130#[serde(rename_all = "camelCase")]
131pub struct GenerationResponse {
132    /// The candidates generated
133    #[serde(default, skip_serializing_if = "Vec::is_empty")]
134    pub candidates: Vec<Candidate>,
135    /// The prompt feedback
136    #[serde(skip_serializing_if = "Option::is_none")]
137    pub prompt_feedback: Option<PromptFeedback>,
138    /// Usage metadata
139    #[serde(skip_serializing_if = "Option::is_none")]
140    pub usage_metadata: Option<UsageMetadata>,
141    /// Model version used
142    #[serde(skip_serializing_if = "Option::is_none")]
143    pub model_version: Option<String>,
144    /// Response ID
145    #[serde(skip_serializing_if = "Option::is_none")]
146    pub response_id: Option<String>,
147}
148
149/// Reason why content was blocked
150#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
151#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
152pub enum BlockReason {
153    /// Default value. This value is unused.
154    BlockReasonUnspecified,
155    /// Prompt was blocked due to safety reasons. Inspect safetyRatings to understand which safety category blocked it.
156    Safety,
157    /// Prompt was blocked due to unknown reasons.
158    Other,
159    /// Prompt was blocked due to the terms which are included from the terminology blocklist.
160    Blocklist,
161    /// Prompt was blocked due to prohibited content.
162    ProhibitedContent,
163    /// Candidates blocked due to unsafe image generation content.
164    ImageSafety,
165}
166
167/// Feedback about the prompt
168#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
169#[serde(rename_all = "camelCase")]
170pub struct PromptFeedback {
171    /// The safety ratings for the prompt
172    #[serde(default, skip_serializing_if = "Vec::is_empty")]
173    pub safety_ratings: Vec<SafetyRating>,
174    /// The block reason if the prompt was blocked
175    #[serde(skip_serializing_if = "Option::is_none")]
176    pub block_reason: Option<BlockReason>,
177}
178
179impl GenerationResponse {
180    /// Get the text of the first candidate
181    pub fn text(&self) -> String {
182        self.candidates
183            .first()
184            .and_then(|c| {
185                c.content.parts.as_ref().and_then(|parts| {
186                    parts.first().and_then(|p| match p {
187                        Part::Text {
188                            text,
189                            thought: _,
190                            thought_signature: _,
191                        } => Some(text.clone()),
192                        _ => None,
193                    })
194                })
195            })
196            .unwrap_or_default()
197    }
198
199    /// Get function calls from the response
200    pub fn function_calls(&self) -> Vec<&crate::tools::FunctionCall> {
201        self.candidates
202            .iter()
203            .flat_map(|c| {
204                c.content
205                    .parts
206                    .as_ref()
207                    .map(|parts| {
208                        parts
209                            .iter()
210                            .filter_map(|p| match p {
211                                Part::FunctionCall {
212                                    function_call,
213                                    thought_signature: _,
214                                } => Some(function_call),
215                                _ => None,
216                            })
217                            .collect::<Vec<_>>()
218                    })
219                    .unwrap_or_default()
220            })
221            .collect()
222    }
223
224    /// Get function calls with their thought signatures from the response
225    pub fn function_calls_with_thoughts(
226        &self,
227    ) -> Vec<(&crate::tools::FunctionCall, Option<&String>)> {
228        self.candidates
229            .iter()
230            .flat_map(|c| {
231                c.content
232                    .parts
233                    .as_ref()
234                    .map(|parts| {
235                        parts
236                            .iter()
237                            .filter_map(|p| match p {
238                                Part::FunctionCall {
239                                    function_call,
240                                    thought_signature,
241                                } => Some((function_call, thought_signature.as_ref())),
242                                _ => None,
243                            })
244                            .collect::<Vec<_>>()
245                    })
246                    .unwrap_or_default()
247            })
248            .collect()
249    }
250
251    /// Get thought summaries from the response
252    pub fn thoughts(&self) -> Vec<String> {
253        self.candidates
254            .iter()
255            .flat_map(|c| {
256                c.content
257                    .parts
258                    .as_ref()
259                    .map(|parts| {
260                        parts
261                            .iter()
262                            .filter_map(|p| match p {
263                                Part::Text {
264                                    text,
265                                    thought: Some(true),
266                                    thought_signature: _,
267                                } => Some(text.clone()),
268                                _ => None,
269                            })
270                            .collect::<Vec<_>>()
271                    })
272                    .unwrap_or_default()
273            })
274            .collect()
275    }
276
277    /// Get all text parts (both regular text and thoughts)
278    pub fn all_text(&self) -> Vec<(String, bool)> {
279        self.candidates
280            .iter()
281            .flat_map(|c| {
282                c.content
283                    .parts
284                    .as_ref()
285                    .map(|parts| {
286                        parts
287                            .iter()
288                            .filter_map(|p| match p {
289                                Part::Text {
290                                    text,
291                                    thought,
292                                    thought_signature: _,
293                                } => Some((text.clone(), thought.unwrap_or(false))),
294                                _ => None,
295                            })
296                            .collect::<Vec<_>>()
297                    })
298                    .unwrap_or_default()
299            })
300            .collect()
301    }
302
303    /// Get text parts with their thought signatures from the response
304    pub fn text_with_thoughts(&self) -> Vec<(String, bool, Option<&String>)> {
305        self.candidates
306            .iter()
307            .flat_map(|c| {
308                c.content
309                    .parts
310                    .as_ref()
311                    .map(|parts| {
312                        parts
313                            .iter()
314                            .filter_map(|p| match p {
315                                Part::Text {
316                                    text,
317                                    thought,
318                                    thought_signature,
319                                } => Some((
320                                    text.clone(),
321                                    thought.unwrap_or(false),
322                                    thought_signature.as_ref(),
323                                )),
324                                _ => None,
325                            })
326                            .collect::<Vec<_>>()
327                    })
328                    .unwrap_or_default()
329            })
330            .collect()
331    }
332}
333
334/// Request to generate content
335#[derive(Debug, Clone, Serialize, Deserialize)]
336#[serde(rename_all = "camelCase")]
337pub struct GenerateContentRequest {
338    /// The contents to generate content from
339    pub contents: Vec<Content>,
340    /// The generation config
341    #[serde(skip_serializing_if = "Option::is_none")]
342    pub generation_config: Option<GenerationConfig>,
343    /// The safety settings
344    #[serde(skip_serializing_if = "Option::is_none")]
345    pub safety_settings: Option<Vec<SafetySetting>>,
346    /// The tools that the model can use
347    #[serde(skip_serializing_if = "Option::is_none")]
348    pub tools: Option<Vec<crate::tools::Tool>>,
349    /// The tool config
350    #[serde(skip_serializing_if = "Option::is_none")]
351    pub tool_config: Option<crate::tools::ToolConfig>,
352    /// The system instruction
353    #[serde(skip_serializing_if = "Option::is_none")]
354    pub system_instruction: Option<Content>,
355    /// The cached content to use
356    #[serde(skip_serializing_if = "Option::is_none")]
357    pub cached_content: Option<String>,
358}
359
360/// Configuration for thinking (Gemini 2.5 series only)
361#[derive(Debug, Clone, Serialize, Deserialize)]
362#[serde(rename_all = "camelCase")]
363pub struct ThinkingConfig {
364    /// The thinking budget (number of thinking tokens)
365    ///
366    /// - Set to 0 to disable thinking
367    /// - Set to -1 for dynamic thinking (model decides)
368    /// - Set to a positive number for a specific token budget
369    ///
370    /// Model-specific ranges:
371    /// - 2.5 Pro: 128 to 32768 (cannot disable thinking)
372    /// - 2.5 Flash: 0 to 24576
373    /// - 2.5 Flash Lite: 512 to 24576
374    #[serde(skip_serializing_if = "Option::is_none")]
375    pub thinking_budget: Option<i32>,
376
377    /// Whether to include thought summaries in the response
378    ///
379    /// When enabled, the response will include synthesized versions of the model's
380    /// raw thoughts, providing insights into the reasoning process.
381    #[serde(skip_serializing_if = "Option::is_none")]
382    pub include_thoughts: Option<bool>,
383}
384
385impl ThinkingConfig {
386    // TODO: Add failable constructor with validation
387    // pub fn new() -> Result<Self, ValidationError> { ... }
388    // Should validate temperature (0.0-1.0), max_tokens (>0), etc.
389
390    /// Create a new thinking config with default settings
391    pub fn new() -> Self {
392        Self {
393            thinking_budget: None,
394            include_thoughts: None,
395        }
396    }
397
398    /// Set the thinking budget
399    pub fn with_thinking_budget(mut self, budget: i32) -> Self {
400        self.thinking_budget = Some(budget);
401        self
402    }
403
404    /// Enable dynamic thinking (model decides the budget)
405    pub fn with_dynamic_thinking(mut self) -> Self {
406        self.thinking_budget = Some(-1);
407        self
408    }
409
410    /// Include thought summaries in the response
411    pub fn with_thoughts_included(mut self, include: bool) -> Self {
412        self.include_thoughts = Some(include);
413        self
414    }
415
416    /// Create a thinking config that enables dynamic thinking with thoughts included
417    pub fn dynamic_thinking() -> Self {
418        Self {
419            thinking_budget: Some(-1),
420            include_thoughts: Some(true),
421        }
422    }
423}
424
425impl Default for ThinkingConfig {
426    fn default() -> Self {
427        Self::new()
428    }
429}
430
431/// Configuration for generation
432#[derive(Debug, Default, Clone, Serialize, Deserialize)]
433#[serde(rename_all = "camelCase")]
434pub struct GenerationConfig {
435    /// The temperature for the model (0.0 to 1.0)
436    ///
437    /// Controls the randomness of the output. Higher values (e.g., 0.9) make output
438    /// more random, lower values (e.g., 0.1) make output more deterministic.
439    #[serde(skip_serializing_if = "Option::is_none")]
440    pub temperature: Option<f32>,
441
442    /// The top-p value for the model (0.0 to 1.0)
443    ///
444    /// For each token generation step, the model considers the top_p percentage of
445    /// probability mass for potential token choices. Lower values are more selective,
446    /// higher values allow more variety.
447    #[serde(skip_serializing_if = "Option::is_none")]
448    pub top_p: Option<f32>,
449
450    /// The top-k value for the model
451    ///
452    /// For each token generation step, the model considers the top_k most likely tokens.
453    /// Lower values are more selective, higher values allow more variety.
454    #[serde(skip_serializing_if = "Option::is_none")]
455    pub top_k: Option<i32>,
456
457    /// The maximum number of tokens to generate
458    ///
459    /// Limits the length of the generated content. One token is roughly 4 characters.
460    #[serde(skip_serializing_if = "Option::is_none")]
461    pub max_output_tokens: Option<i32>,
462
463    /// The candidate count
464    ///
465    /// Number of alternative responses to generate.
466    #[serde(skip_serializing_if = "Option::is_none")]
467    pub candidate_count: Option<i32>,
468
469    /// Whether to stop on specific sequences
470    ///
471    /// The model will stop generating content when it encounters any of these sequences.
472    #[serde(skip_serializing_if = "Option::is_none")]
473    pub stop_sequences: Option<Vec<String>>,
474
475    /// The response mime type
476    ///
477    /// Specifies the format of the model's response.
478    #[serde(skip_serializing_if = "Option::is_none")]
479    pub response_mime_type: Option<String>,
480    /// The response schema
481    ///
482    /// Specifies the JSON schema for structured responses.
483    #[serde(skip_serializing_if = "Option::is_none")]
484    pub response_schema: Option<serde_json::Value>,
485
486    /// Response modalities (for TTS and other multimodal outputs)
487    #[serde(skip_serializing_if = "Option::is_none")]
488    pub response_modalities: Option<Vec<String>>,
489
490    /// Speech configuration for text-to-speech generation
491    #[serde(skip_serializing_if = "Option::is_none")]
492    pub speech_config: Option<SpeechConfig>,
493
494    /// The thinking configuration
495    ///
496    /// Configuration for the model's thinking process (Gemini 2.5 series only).
497    #[serde(skip_serializing_if = "Option::is_none")]
498    pub thinking_config: Option<ThinkingConfig>,
499}
500
501/// Configuration for speech generation (text-to-speech)
502#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
503#[serde(rename_all = "camelCase")]
504pub struct SpeechConfig {
505    /// Single voice configuration
506    #[serde(skip_serializing_if = "Option::is_none")]
507    pub voice_config: Option<VoiceConfig>,
508    /// Multi-speaker voice configuration
509    #[serde(skip_serializing_if = "Option::is_none")]
510    pub multi_speaker_voice_config: Option<MultiSpeakerVoiceConfig>,
511}
512
513/// Voice configuration for text-to-speech
514#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
515#[serde(rename_all = "camelCase")]
516pub struct VoiceConfig {
517    /// Prebuilt voice configuration
518    #[serde(skip_serializing_if = "Option::is_none")]
519    pub prebuilt_voice_config: Option<PrebuiltVoiceConfig>,
520}
521
522/// Prebuilt voice configuration
523#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
524#[serde(rename_all = "camelCase")]
525pub struct PrebuiltVoiceConfig {
526    /// The name of the voice to use
527    pub voice_name: String,
528}
529
530/// Multi-speaker voice configuration
531#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
532#[serde(rename_all = "camelCase")]
533pub struct MultiSpeakerVoiceConfig {
534    /// Configuration for each speaker
535    pub speaker_voice_configs: Vec<SpeakerVoiceConfig>,
536}
537
538/// Configuration for a specific speaker in multi-speaker TTS
539#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
540#[serde(rename_all = "camelCase")]
541pub struct SpeakerVoiceConfig {
542    /// The name of the speaker (must match the name used in the prompt)
543    pub speaker: String,
544    /// Voice configuration for this speaker
545    pub voice_config: VoiceConfig,
546}
547
548impl SpeechConfig {
549    /// Create a new speech config with a single voice
550    pub fn single_voice(voice_name: impl Into<String>) -> Self {
551        Self {
552            voice_config: Some(VoiceConfig {
553                prebuilt_voice_config: Some(PrebuiltVoiceConfig {
554                    voice_name: voice_name.into(),
555                }),
556            }),
557            multi_speaker_voice_config: None,
558        }
559    }
560
561    /// Create a new speech config with multiple speakers
562    pub fn multi_speaker(speakers: Vec<SpeakerVoiceConfig>) -> Self {
563        Self {
564            voice_config: None,
565            multi_speaker_voice_config: Some(MultiSpeakerVoiceConfig {
566                speaker_voice_configs: speakers,
567            }),
568        }
569    }
570}
571
572impl SpeakerVoiceConfig {
573    /// Create a new speaker voice configuration
574    pub fn new(speaker: impl Into<String>, voice_name: impl Into<String>) -> Self {
575        Self {
576            speaker: speaker.into(),
577            voice_config: VoiceConfig {
578                prebuilt_voice_config: Some(PrebuiltVoiceConfig {
579                    voice_name: voice_name.into(),
580                }),
581            },
582        }
583    }
584}