gemini_rust/generation/
model.rs

1use serde::{Deserialize, Serialize};
2use time::OffsetDateTime;
3
4use crate::{
5    safety::{SafetyRating, SafetySetting},
6    Content, Modality, Part,
7};
8
9/// Reason why generation finished
10#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
11#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
12pub enum FinishReason {
13    /// Default value. This value is unused.
14    FinishReasonUnspecified,
15    /// Natural stop point of the model or provided stop sequence.
16    Stop,
17    /// The maximum number of tokens as specified in the request was reached.
18    MaxTokens,
19    /// The response candidate content was flagged for safety reasons.
20    Safety,
21    /// The response candidate content was flagged for recitation reasons.
22    Recitation,
23    /// The response candidate content was flagged for using an unsupported language.
24    Language,
25    /// Unknown reason.
26    Other,
27    /// Token generation stopped because the content contains forbidden terms.
28    Blocklist,
29    /// Token generation stopped for potentially containing prohibited content.
30    ProhibitedContent,
31    /// Token generation stopped because the content potentially contains Sensitive Personally Identifiable Information (SPII).
32    Spii,
33    /// The function call generated by the model is invalid.
34    MalformedFunctionCall,
35    /// Token generation stopped because generated images contain safety violations.
36    ImageSafety,
37    /// Model generated a tool call but no tools were enabled in the request.
38    UnexpectedToolCall,
39    /// Model called too many tools consecutively, thus the system exited execution.
40    TooManyToolCalls,
41}
42
43/// Citation metadata for content
44#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
45#[serde(rename_all = "camelCase")]
46pub struct CitationMetadata {
47    /// The citation sources
48    pub citation_sources: Vec<CitationSource>,
49}
50
51/// Citation source
52#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
53#[serde(rename_all = "camelCase")]
54pub struct CitationSource {
55    /// The URI of the citation source
56    pub uri: Option<String>,
57    /// The title of the citation source
58    pub title: Option<String>,
59    /// The start index of the citation in the response
60    pub start_index: Option<i32>,
61    /// The end index of the citation in the response
62    pub end_index: Option<i32>,
63    /// The license of the citation source
64    pub license: Option<String>,
65    /// The publication date of the citation source
66    #[serde(default, with = "time::serde::rfc3339::option")]
67    pub publication_date: Option<OffsetDateTime>,
68}
69
70/// A candidate response
71#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
72#[serde(rename_all = "camelCase")]
73pub struct Candidate {
74    /// The content of the candidate
75    #[serde(default)]
76    pub content: Content,
77    /// The safety ratings for the candidate
78    #[serde(skip_serializing_if = "Option::is_none")]
79    pub safety_ratings: Option<Vec<SafetyRating>>,
80    /// The citation metadata for the candidate
81    #[serde(skip_serializing_if = "Option::is_none")]
82    pub citation_metadata: Option<CitationMetadata>,
83    /// The finish reason for the candidate
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub finish_reason: Option<FinishReason>,
86    /// The index of the candidate
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub index: Option<i32>,
89}
90
91/// Metadata about token usage
92#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
93#[serde(rename_all = "camelCase")]
94pub struct UsageMetadata {
95    /// The number of prompt tokens (null if request processing failed)
96    #[serde(skip_serializing_if = "Option::is_none")]
97    pub prompt_token_count: Option<i32>,
98    /// The number of response tokens (null if generation failed)
99    #[serde(skip_serializing_if = "Option::is_none")]
100    pub candidates_token_count: Option<i32>,
101    /// The total number of tokens (null if individual counts unavailable)
102    #[serde(skip_serializing_if = "Option::is_none")]
103    pub total_token_count: Option<i32>,
104    /// The number of thinking tokens (Gemini 2.5 series only)
105    #[serde(skip_serializing_if = "Option::is_none")]
106    pub thoughts_token_count: Option<i32>,
107    /// Detailed prompt token information
108    #[serde(skip_serializing_if = "Option::is_none")]
109    pub prompt_tokens_details: Option<Vec<PromptTokenDetails>>,
110    /// The number of cached content tokens (batch API)
111    #[serde(skip_serializing_if = "Option::is_none")]
112    pub cached_content_token_count: Option<i32>,
113    /// Detailed cache token information (batch API)
114    #[serde(skip_serializing_if = "Option::is_none")]
115    pub cache_tokens_details: Option<Vec<PromptTokenDetails>>,
116}
117
118/// Details about prompt tokens by modality
119#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
120#[serde(rename_all = "camelCase")]
121pub struct PromptTokenDetails {
122    /// The modality (e.g., "TEXT")
123    pub modality: Modality,
124    /// Token count for this modality
125    pub token_count: i32,
126}
127
128/// Response from the Gemini API for content generation
129#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
130#[serde(rename_all = "camelCase")]
131pub struct GenerationResponse {
132    /// The candidates generated
133    pub candidates: Vec<Candidate>,
134    /// The prompt feedback
135    #[serde(skip_serializing_if = "Option::is_none")]
136    pub prompt_feedback: Option<PromptFeedback>,
137    /// Usage metadata
138    #[serde(skip_serializing_if = "Option::is_none")]
139    pub usage_metadata: Option<UsageMetadata>,
140    /// Model version used
141    #[serde(skip_serializing_if = "Option::is_none")]
142    pub model_version: Option<String>,
143    /// Response ID
144    #[serde(skip_serializing_if = "Option::is_none")]
145    pub response_id: Option<String>,
146}
147
148/// Reason why content was blocked
149#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
150#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
151pub enum BlockReason {
152    /// Default value. This value is unused.
153    BlockReasonUnspecified,
154    /// Prompt was blocked due to safety reasons. Inspect safetyRatings to understand which safety category blocked it.
155    Safety,
156    /// Prompt was blocked due to unknown reasons.
157    Other,
158    /// Prompt was blocked due to the terms which are included from the terminology blocklist.
159    Blocklist,
160    /// Prompt was blocked due to prohibited content.
161    ProhibitedContent,
162    /// Candidates blocked due to unsafe image generation content.
163    ImageSafety,
164}
165
166/// Feedback about the prompt
167#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
168#[serde(rename_all = "camelCase")]
169pub struct PromptFeedback {
170    /// The safety ratings for the prompt
171    pub safety_ratings: Vec<SafetyRating>,
172    /// The block reason if the prompt was blocked
173    #[serde(skip_serializing_if = "Option::is_none")]
174    pub block_reason: Option<BlockReason>,
175}
176
177impl GenerationResponse {
178    /// Get the text of the first candidate
179    pub fn text(&self) -> String {
180        self.candidates
181            .first()
182            .and_then(|c| {
183                c.content.parts.as_ref().and_then(|parts| {
184                    parts.first().and_then(|p| match p {
185                        Part::Text {
186                            text,
187                            thought: _,
188                            thought_signature: _,
189                        } => Some(text.clone()),
190                        _ => None,
191                    })
192                })
193            })
194            .unwrap_or_default()
195    }
196
197    /// Get function calls from the response
198    pub fn function_calls(&self) -> Vec<&crate::tools::FunctionCall> {
199        self.candidates
200            .iter()
201            .flat_map(|c| {
202                c.content
203                    .parts
204                    .as_ref()
205                    .map(|parts| {
206                        parts
207                            .iter()
208                            .filter_map(|p| match p {
209                                Part::FunctionCall {
210                                    function_call,
211                                    thought_signature: _,
212                                } => Some(function_call),
213                                _ => None,
214                            })
215                            .collect::<Vec<_>>()
216                    })
217                    .unwrap_or_default()
218            })
219            .collect()
220    }
221
222    /// Get function calls with their thought signatures from the response
223    pub fn function_calls_with_thoughts(
224        &self,
225    ) -> Vec<(&crate::tools::FunctionCall, Option<&String>)> {
226        self.candidates
227            .iter()
228            .flat_map(|c| {
229                c.content
230                    .parts
231                    .as_ref()
232                    .map(|parts| {
233                        parts
234                            .iter()
235                            .filter_map(|p| match p {
236                                Part::FunctionCall {
237                                    function_call,
238                                    thought_signature,
239                                } => Some((function_call, thought_signature.as_ref())),
240                                _ => None,
241                            })
242                            .collect::<Vec<_>>()
243                    })
244                    .unwrap_or_default()
245            })
246            .collect()
247    }
248
249    /// Get thought summaries from the response
250    pub fn thoughts(&self) -> Vec<String> {
251        self.candidates
252            .iter()
253            .flat_map(|c| {
254                c.content
255                    .parts
256                    .as_ref()
257                    .map(|parts| {
258                        parts
259                            .iter()
260                            .filter_map(|p| match p {
261                                Part::Text {
262                                    text,
263                                    thought: Some(true),
264                                    thought_signature: _,
265                                } => Some(text.clone()),
266                                _ => None,
267                            })
268                            .collect::<Vec<_>>()
269                    })
270                    .unwrap_or_default()
271            })
272            .collect()
273    }
274
275    /// Get all text parts (both regular text and thoughts)
276    pub fn all_text(&self) -> Vec<(String, bool)> {
277        self.candidates
278            .iter()
279            .flat_map(|c| {
280                c.content
281                    .parts
282                    .as_ref()
283                    .map(|parts| {
284                        parts
285                            .iter()
286                            .filter_map(|p| match p {
287                                Part::Text {
288                                    text,
289                                    thought,
290                                    thought_signature: _,
291                                } => Some((text.clone(), thought.unwrap_or(false))),
292                                _ => None,
293                            })
294                            .collect::<Vec<_>>()
295                    })
296                    .unwrap_or_default()
297            })
298            .collect()
299    }
300
301    /// Get text parts with their thought signatures from the response
302    pub fn text_with_thoughts(&self) -> Vec<(String, bool, Option<&String>)> {
303        self.candidates
304            .iter()
305            .flat_map(|c| {
306                c.content
307                    .parts
308                    .as_ref()
309                    .map(|parts| {
310                        parts
311                            .iter()
312                            .filter_map(|p| match p {
313                                Part::Text {
314                                    text,
315                                    thought,
316                                    thought_signature,
317                                } => Some((
318                                    text.clone(),
319                                    thought.unwrap_or(false),
320                                    thought_signature.as_ref(),
321                                )),
322                                _ => None,
323                            })
324                            .collect::<Vec<_>>()
325                    })
326                    .unwrap_or_default()
327            })
328            .collect()
329    }
330}
331
332/// Request to generate content
333#[derive(Debug, Clone, Serialize, Deserialize)]
334#[serde(rename_all = "camelCase")]
335pub struct GenerateContentRequest {
336    /// The contents to generate content from
337    pub contents: Vec<Content>,
338    /// The generation config
339    #[serde(skip_serializing_if = "Option::is_none")]
340    pub generation_config: Option<GenerationConfig>,
341    /// The safety settings
342    #[serde(skip_serializing_if = "Option::is_none")]
343    pub safety_settings: Option<Vec<SafetySetting>>,
344    /// The tools that the model can use
345    #[serde(skip_serializing_if = "Option::is_none")]
346    pub tools: Option<Vec<crate::tools::Tool>>,
347    /// The tool config
348    #[serde(skip_serializing_if = "Option::is_none")]
349    pub tool_config: Option<crate::tools::ToolConfig>,
350    /// The system instruction
351    #[serde(skip_serializing_if = "Option::is_none")]
352    pub system_instruction: Option<Content>,
353    /// The cached content to use
354    #[serde(skip_serializing_if = "Option::is_none")]
355    pub cached_content: Option<String>,
356}
357
358/// Configuration for thinking (Gemini 2.5 series only)
359#[derive(Debug, Clone, Serialize, Deserialize)]
360#[serde(rename_all = "camelCase")]
361pub struct ThinkingConfig {
362    /// The thinking budget (number of thinking tokens)
363    ///
364    /// - Set to 0 to disable thinking
365    /// - Set to -1 for dynamic thinking (model decides)
366    /// - Set to a positive number for a specific token budget
367    ///
368    /// Model-specific ranges:
369    /// - 2.5 Pro: 128 to 32768 (cannot disable thinking)
370    /// - 2.5 Flash: 0 to 24576
371    /// - 2.5 Flash Lite: 512 to 24576
372    #[serde(skip_serializing_if = "Option::is_none")]
373    pub thinking_budget: Option<i32>,
374
375    /// Whether to include thought summaries in the response
376    ///
377    /// When enabled, the response will include synthesized versions of the model's
378    /// raw thoughts, providing insights into the reasoning process.
379    #[serde(skip_serializing_if = "Option::is_none")]
380    pub include_thoughts: Option<bool>,
381}
382
383impl ThinkingConfig {
384    // TODO: Add failable constructor with validation
385    // pub fn new() -> Result<Self, ValidationError> { ... }
386    // Should validate temperature (0.0-1.0), max_tokens (>0), etc.
387
388    /// Create a new thinking config with default settings
389    pub fn new() -> Self {
390        Self {
391            thinking_budget: None,
392            include_thoughts: None,
393        }
394    }
395
396    /// Set the thinking budget
397    pub fn with_thinking_budget(mut self, budget: i32) -> Self {
398        self.thinking_budget = Some(budget);
399        self
400    }
401
402    /// Enable dynamic thinking (model decides the budget)
403    pub fn with_dynamic_thinking(mut self) -> Self {
404        self.thinking_budget = Some(-1);
405        self
406    }
407
408    /// Include thought summaries in the response
409    pub fn with_thoughts_included(mut self, include: bool) -> Self {
410        self.include_thoughts = Some(include);
411        self
412    }
413}
414
415impl Default for ThinkingConfig {
416    fn default() -> Self {
417        Self::new()
418    }
419}
420
421/// Configuration for generation
422#[derive(Debug, Default, Clone, Serialize, Deserialize)]
423#[serde(rename_all = "camelCase")]
424pub struct GenerationConfig {
425    /// The temperature for the model (0.0 to 1.0)
426    ///
427    /// Controls the randomness of the output. Higher values (e.g., 0.9) make output
428    /// more random, lower values (e.g., 0.1) make output more deterministic.
429    #[serde(skip_serializing_if = "Option::is_none")]
430    pub temperature: Option<f32>,
431
432    /// The top-p value for the model (0.0 to 1.0)
433    ///
434    /// For each token generation step, the model considers the top_p percentage of
435    /// probability mass for potential token choices. Lower values are more selective,
436    /// higher values allow more variety.
437    #[serde(skip_serializing_if = "Option::is_none")]
438    pub top_p: Option<f32>,
439
440    /// The top-k value for the model
441    ///
442    /// For each token generation step, the model considers the top_k most likely tokens.
443    /// Lower values are more selective, higher values allow more variety.
444    #[serde(skip_serializing_if = "Option::is_none")]
445    pub top_k: Option<i32>,
446
447    /// The maximum number of tokens to generate
448    ///
449    /// Limits the length of the generated content. One token is roughly 4 characters.
450    #[serde(skip_serializing_if = "Option::is_none")]
451    pub max_output_tokens: Option<i32>,
452
453    /// The candidate count
454    ///
455    /// Number of alternative responses to generate.
456    #[serde(skip_serializing_if = "Option::is_none")]
457    pub candidate_count: Option<i32>,
458
459    /// Whether to stop on specific sequences
460    ///
461    /// The model will stop generating content when it encounters any of these sequences.
462    #[serde(skip_serializing_if = "Option::is_none")]
463    pub stop_sequences: Option<Vec<String>>,
464
465    /// The response mime type
466    ///
467    /// Specifies the format of the model's response.
468    #[serde(skip_serializing_if = "Option::is_none")]
469    pub response_mime_type: Option<String>,
470    /// The response schema
471    ///
472    /// Specifies the JSON schema for structured responses.
473    #[serde(skip_serializing_if = "Option::is_none")]
474    pub response_schema: Option<serde_json::Value>,
475
476    /// Response modalities (for TTS and other multimodal outputs)
477    #[serde(skip_serializing_if = "Option::is_none")]
478    pub response_modalities: Option<Vec<String>>,
479
480    /// Speech configuration for text-to-speech generation
481    #[serde(skip_serializing_if = "Option::is_none")]
482    pub speech_config: Option<SpeechConfig>,
483
484    /// The thinking configuration
485    ///
486    /// Configuration for the model's thinking process (Gemini 2.5 series only).
487    #[serde(skip_serializing_if = "Option::is_none")]
488    pub thinking_config: Option<ThinkingConfig>,
489}
490
491/// Configuration for speech generation (text-to-speech)
492#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
493#[serde(rename_all = "camelCase")]
494pub struct SpeechConfig {
495    /// Single voice configuration
496    #[serde(skip_serializing_if = "Option::is_none")]
497    pub voice_config: Option<VoiceConfig>,
498    /// Multi-speaker voice configuration
499    #[serde(skip_serializing_if = "Option::is_none")]
500    pub multi_speaker_voice_config: Option<MultiSpeakerVoiceConfig>,
501}
502
503/// Voice configuration for text-to-speech
504#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
505#[serde(rename_all = "camelCase")]
506pub struct VoiceConfig {
507    /// Prebuilt voice configuration
508    #[serde(skip_serializing_if = "Option::is_none")]
509    pub prebuilt_voice_config: Option<PrebuiltVoiceConfig>,
510}
511
512/// Prebuilt voice configuration
513#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
514#[serde(rename_all = "camelCase")]
515pub struct PrebuiltVoiceConfig {
516    /// The name of the voice to use
517    pub voice_name: String,
518}
519
520/// Multi-speaker voice configuration
521#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
522#[serde(rename_all = "camelCase")]
523pub struct MultiSpeakerVoiceConfig {
524    /// Configuration for each speaker
525    pub speaker_voice_configs: Vec<SpeakerVoiceConfig>,
526}
527
528/// Configuration for a specific speaker in multi-speaker TTS
529#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
530#[serde(rename_all = "camelCase")]
531pub struct SpeakerVoiceConfig {
532    /// The name of the speaker (must match the name used in the prompt)
533    pub speaker: String,
534    /// Voice configuration for this speaker
535    pub voice_config: VoiceConfig,
536}
537
538impl SpeechConfig {
539    /// Create a new speech config with a single voice
540    pub fn single_voice(voice_name: impl Into<String>) -> Self {
541        Self {
542            voice_config: Some(VoiceConfig {
543                prebuilt_voice_config: Some(PrebuiltVoiceConfig {
544                    voice_name: voice_name.into(),
545                }),
546            }),
547            multi_speaker_voice_config: None,
548        }
549    }
550
551    /// Create a new speech config with multiple speakers
552    pub fn multi_speaker(speakers: Vec<SpeakerVoiceConfig>) -> Self {
553        Self {
554            voice_config: None,
555            multi_speaker_voice_config: Some(MultiSpeakerVoiceConfig {
556                speaker_voice_configs: speakers,
557            }),
558        }
559    }
560}
561
562impl SpeakerVoiceConfig {
563    /// Create a new speaker voice configuration
564    pub fn new(speaker: impl Into<String>, voice_name: impl Into<String>) -> Self {
565        Self {
566            speaker: speaker.into(),
567            voice_config: VoiceConfig {
568                prebuilt_voice_config: Some(PrebuiltVoiceConfig {
569                    voice_name: voice_name.into(),
570                }),
571            },
572        }
573    }
574}