Skip to main content

adk_gemini/
models.rs

1//! # Core Gemini API Primitives
2//!
3//! This module contains the fundamental building blocks used across the Gemini API.
4//! These core data structures are shared by multiple modules and form the foundation
5//! for constructing requests and parsing responses.
6//!
7//! ## Core Types
8//!
9//! - [`Role`] - Represents the speaker in a conversation (User or Model)
10//! - [`Part`] - Content fragments that make up messages (text, images, function calls)
11//! - [`Blob`] - Binary data with MIME type for inline content
12//! - [`Content`] - Container for parts with optional role assignment
13//! - [`Message`] - Complete message with content and explicit role
14//! - [`Modality`] - Output format types (text, image, audio)
15//!
16//! ## Usage
17//!
18//! These types are typically used in combination with the domain-specific modules:
19//! - `generation` - For content generation requests and responses
20//! - `embedding` - For text embedding operations
21//! - `safety` - For content moderation settings
22//! - `tools` - For function calling capabilities
23//! - `batch` - For batch processing operations
24//! - `cache` - For content caching
25//! - `files` - For file management
26
27#![allow(clippy::enum_variant_names)]
28
29use serde::{Deserialize, Serialize, de};
30
31/// Role of a message in a conversation
32#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
33#[serde(rename_all = "lowercase")]
34pub enum Role {
35    /// Message from the user
36    User,
37    /// Message from the model
38    Model,
39}
40
41/// Content part that can be included in a message
42#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
43#[serde(untagged)]
44pub enum Part {
45    /// Text content
46    Text {
47        /// The text content
48        text: String,
49        /// Whether this is a thought summary (Gemini 2.5 series only)
50        #[serde(skip_serializing_if = "Option::is_none")]
51        thought: Option<bool>,
52        /// The thought signature (Gemini 2.5+ thinking models only).
53        /// Preserved from responses and echoed back in conversation history for Gemini 3.x thought signature support.
54        #[serde(rename = "thoughtSignature", default, skip_serializing_if = "Option::is_none")]
55        thought_signature: Option<String>,
56    },
57    /// Inline binary data (base64-encoded with MIME type).
58    InlineData {
59        /// The blob data
60        #[serde(rename = "inlineData")]
61        inline_data: Blob,
62    },
63    /// File data referenced by URI
64    FileData {
65        /// The file data reference.
66        #[serde(rename = "fileData")]
67        file_data: FileDataRef,
68    },
69    /// Function call from the model
70    FunctionCall {
71        /// The function call details
72        #[serde(rename = "functionCall")]
73        function_call: super::tools::FunctionCall,
74        /// The thought signature (Gemini 2.5+ thinking models only).
75        /// Preserved from responses and echoed back in conversation history for Gemini 3.x thought signature support.
76        #[serde(rename = "thoughtSignature", default, skip_serializing_if = "Option::is_none")]
77        thought_signature: Option<String>,
78    },
79    /// Function response (results from executing a function call)
80    FunctionResponse {
81        /// The function response details
82        #[serde(rename = "functionResponse")]
83        function_response: super::tools::FunctionResponse,
84        /// The thought signature (Gemini 3.x thinking models).
85        /// Must be echoed back on function response parts when thinking is active.
86        #[serde(rename = "thoughtSignature", default, skip_serializing_if = "Option::is_none")]
87        thought_signature: Option<String>,
88    },
89    /// Server-side tool call from Gemini 3 (built-in tool invocation)
90    ToolCall {
91        /// The tool call payload.
92        #[serde(rename = "toolCall")]
93        tool_call: serde_json::Value,
94        /// The thought signature (Gemini 3.x thinking models).
95        /// Must be preserved and echoed back in conversation history.
96        #[serde(rename = "thoughtSignature", default, skip_serializing_if = "Option::is_none")]
97        thought_signature: Option<String>,
98    },
99    /// Server-side tool response from Gemini 3 (built-in tool result)
100    ToolResponse {
101        /// The tool response payload.
102        #[serde(rename = "toolResponse")]
103        tool_response: serde_json::Value,
104        /// The thought signature (Gemini 3.x thinking models).
105        /// Must be preserved and echoed back in conversation history.
106        #[serde(rename = "thoughtSignature", default, skip_serializing_if = "Option::is_none")]
107        thought_signature: Option<String>,
108    },
109    /// Generated code emitted by Gemini code execution.
110    ExecutableCode {
111        /// The executable code payload.
112        #[serde(rename = "executableCode")]
113        executable_code: serde_json::Value,
114        /// The thought signature (Gemini 3.x thinking models).
115        /// Must be preserved and echoed back in conversation history.
116        #[serde(rename = "thoughtSignature", default, skip_serializing_if = "Option::is_none")]
117        thought_signature: Option<String>,
118    },
119    /// Result emitted by Gemini code execution.
120    CodeExecutionResult {
121        /// The code execution result payload.
122        #[serde(rename = "codeExecutionResult")]
123        code_execution_result: serde_json::Value,
124        /// The thought signature (Gemini 3.x thinking models).
125        /// Must be preserved and echoed back in conversation history.
126        #[serde(rename = "thoughtSignature", default, skip_serializing_if = "Option::is_none")]
127        thought_signature: Option<String>,
128    },
129}
130
131/// Blob for a message part
132#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
133#[serde(rename_all = "camelCase")]
134pub struct Blob {
135    /// The MIME type of the data
136    pub mime_type: String,
137    /// Base64 encoded data
138    pub data: String,
139}
140
141impl Blob {
142    /// Create a new blob with mime type and data
143    pub fn new(mime_type: impl Into<String>, data: impl Into<String>) -> Self {
144        Self { mime_type: mime_type.into(), data: data.into() }
145    }
146}
147
148/// Reference to an external file by URI, used in Gemini wire format.
149///
150/// # Example
151///
152/// ```rust
153/// use adk_gemini::FileDataRef;
154///
155/// let file_ref = FileDataRef {
156///     mime_type: "application/pdf".to_string(),
157///     file_uri: "gs://my-bucket/report.pdf".to_string(),
158/// };
159/// ```
160#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
161#[serde(rename_all = "camelCase")]
162pub struct FileDataRef {
163    /// The MIME type of the referenced file.
164    pub mime_type: String,
165    /// The URI of the file (e.g. `gs://bucket/path` or a Gemini file URI).
166    pub file_uri: String,
167}
168
169/// Content of a message
170#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)]
171#[serde(rename_all = "camelCase")]
172pub struct Content {
173    /// Parts of the content
174    #[serde(skip_serializing_if = "Option::is_none")]
175    pub parts: Option<Vec<Part>>,
176    /// Role of the content
177    #[serde(skip_serializing_if = "Option::is_none")]
178    pub role: Option<Role>,
179}
180
181impl Content {
182    /// Create a new text content
183    pub fn text(text: impl Into<String>) -> Self {
184        Self {
185            parts: Some(vec![Part::Text {
186                text: text.into(),
187                thought: None,
188                thought_signature: None,
189            }]),
190            role: None,
191        }
192    }
193
194    /// Create a new content with a function call
195    pub fn function_call(function_call: super::tools::FunctionCall) -> Self {
196        Self {
197            parts: Some(vec![Part::FunctionCall { function_call, thought_signature: None }]),
198            role: None,
199        }
200    }
201
202    /// Create a new content with a function call and thought signature
203    pub fn function_call_with_thought(
204        function_call: super::tools::FunctionCall,
205        thought_signature: impl Into<String>,
206    ) -> Self {
207        Self {
208            parts: Some(vec![Part::FunctionCall {
209                function_call,
210                thought_signature: Some(thought_signature.into()),
211            }]),
212            role: None,
213        }
214    }
215
216    /// Create a new text content with thought signature
217    pub fn text_with_thought_signature(
218        text: impl Into<String>,
219        thought_signature: impl Into<String>,
220    ) -> Self {
221        Self {
222            parts: Some(vec![Part::Text {
223                text: text.into(),
224                thought: None,
225                thought_signature: Some(thought_signature.into()),
226            }]),
227            role: None,
228        }
229    }
230
231    /// Create a new thought content with thought signature
232    pub fn thought_with_signature(
233        text: impl Into<String>,
234        thought_signature: impl Into<String>,
235    ) -> Self {
236        Self {
237            parts: Some(vec![Part::Text {
238                text: text.into(),
239                thought: Some(true),
240                thought_signature: Some(thought_signature.into()),
241            }]),
242            role: None,
243        }
244    }
245
246    /// Create a new content with a function response
247    pub fn function_response(function_response: super::tools::FunctionResponse) -> Self {
248        Self {
249            parts: Some(vec![Part::FunctionResponse {
250                function_response,
251                thought_signature: None,
252            }]),
253            role: None,
254        }
255    }
256
257    /// Create a new content with a function response from name and JSON value
258    pub fn function_response_json(name: impl Into<String>, response: serde_json::Value) -> Self {
259        Self {
260            parts: Some(vec![Part::FunctionResponse {
261                function_response: super::tools::FunctionResponse::new(name, response),
262                thought_signature: None,
263            }]),
264            role: None,
265        }
266    }
267
268    /// Create a new content with inline data (blob data)
269    pub fn inline_data(mime_type: impl Into<String>, data: impl Into<String>) -> Self {
270        Self {
271            parts: Some(vec![Part::InlineData { inline_data: Blob::new(mime_type, data) }]),
272            role: None,
273        }
274    }
275
276    /// Create function response content with multimodal parts.
277    ///
278    /// The `FunctionResponse` carries its multimodal data (inline images, file references)
279    /// in its own `parts` field, matching the Gemini wire format where `inlineData`/`fileData`
280    /// entries are nested inside the `functionResponse` object.
281    pub fn function_response_multimodal(function_response: super::tools::FunctionResponse) -> Self {
282        Self {
283            parts: Some(vec![Part::FunctionResponse {
284                function_response,
285                thought_signature: None,
286            }]),
287            role: None,
288        }
289    }
290
291    /// Add a role to this content
292    pub fn with_role(mut self, role: Role) -> Self {
293        self.role = Some(role);
294        self
295    }
296}
297
298/// Message in a conversation
299#[derive(Debug, Clone, Serialize, Deserialize)]
300pub struct Message {
301    /// Content of the message
302    pub content: Content,
303    /// Role of the message
304    pub role: Role,
305}
306
307impl Message {
308    /// Create a new user message with text content
309    pub fn user(text: impl Into<String>) -> Self {
310        Self { content: Content::text(text).with_role(Role::User), role: Role::User }
311    }
312
313    /// Create a new model message with text content
314    pub fn model(text: impl Into<String>) -> Self {
315        Self { content: Content::text(text).with_role(Role::Model), role: Role::Model }
316    }
317
318    /// Create a new embedding message with text content
319    pub fn embed(text: impl Into<String>) -> Self {
320        Self { content: Content::text(text), role: Role::Model }
321    }
322
323    /// Create a new function message with function response content from JSON
324    pub fn function(name: impl Into<String>, response: serde_json::Value) -> Self {
325        Self {
326            content: Content::function_response_json(name, response).with_role(Role::Model),
327            role: Role::Model,
328        }
329    }
330
331    /// Create a new function message with function response from a JSON string
332    pub fn function_str(
333        name: impl Into<String>,
334        response: impl Into<String>,
335    ) -> Result<Self, serde_json::Error> {
336        let response_str = response.into();
337        let json = serde_json::from_str(&response_str)?;
338        Ok(Self {
339            content: Content::function_response_json(name, json).with_role(Role::Model),
340            role: Role::Model,
341        })
342    }
343}
344
345/// Content modality type - specifies the format of model output
346#[derive(Debug, Clone, Serialize, PartialEq)]
347#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
348pub enum Modality {
349    /// Default value.
350    ModalityUnspecified,
351    /// Indicates the model should return text.
352    Text,
353    /// Indicates the model should return images.
354    Image,
355    /// Indicates the model should return audio.
356    Audio,
357    /// Indicates the model should return video.
358    Video,
359    /// Indicates document content (PDFs, etc.)
360    Document,
361    /// Unknown or future modality types
362    Unknown,
363}
364
365impl Modality {
366    fn from_wire_str(value: &str) -> Self {
367        match value {
368            "MODALITY_UNSPECIFIED" => Self::ModalityUnspecified,
369            "TEXT" => Self::Text,
370            "IMAGE" => Self::Image,
371            "AUDIO" => Self::Audio,
372            "VIDEO" => Self::Video,
373            "DOCUMENT" => Self::Document,
374            _ => Self::Unknown,
375        }
376    }
377
378    fn from_wire_number(value: i64) -> Self {
379        match value {
380            0 => Self::ModalityUnspecified,
381            1 => Self::Text,
382            2 => Self::Image,
383            3 => Self::Video,
384            4 => Self::Audio,
385            5 => Self::Document,
386            _ => Self::Unknown,
387        }
388    }
389}
390
391impl<'de> Deserialize<'de> for Modality {
392    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
393    where
394        D: serde::Deserializer<'de>,
395    {
396        let value = serde_json::Value::deserialize(deserializer)?;
397        match value {
398            serde_json::Value::String(s) => Ok(Self::from_wire_str(&s)),
399            serde_json::Value::Number(n) => n
400                .as_i64()
401                .map(Self::from_wire_number)
402                .ok_or_else(|| de::Error::custom("modality must be an integer-compatible number")),
403            _ => Err(de::Error::custom("modality must be a string or integer")),
404        }
405    }
406}
407
408#[cfg(test)]
409mod tests {
410    use super::*;
411
412    #[test]
413    fn test_tool_call_deserialize_and_roundtrip() {
414        let json = r#"{"toolCall": {"name": "google_search", "args": {"query": "rust lang"}}}"#;
415        let part: Part = serde_json::from_str(json).expect("should deserialize toolCall");
416        match &part {
417            Part::ToolCall { tool_call, .. } => {
418                assert_eq!(tool_call["name"], "google_search");
419                assert_eq!(tool_call["args"]["query"], "rust lang");
420            }
421            other => panic!("expected Part::ToolCall, got {other:?}"),
422        }
423        // Round-trip
424        let serialized = serde_json::to_string(&part).expect("should serialize");
425        let deserialized: Part =
426            serde_json::from_str(&serialized).expect("should deserialize again");
427        assert_eq!(part, deserialized);
428    }
429
430    #[test]
431    fn test_tool_response_deserialize_and_roundtrip() {
432        let json = r#"{"toolResponse": {"name": "google_search", "output": {"results": []}}, "thoughtSignature": "sig_123"}"#;
433        let part: Part = serde_json::from_str(json).expect("should deserialize toolResponse");
434        match &part {
435            Part::ToolResponse { tool_response, thought_signature } => {
436                assert_eq!(tool_response["name"], "google_search");
437                assert_eq!(tool_response["output"]["results"], serde_json::json!([]));
438                assert_eq!(thought_signature.as_deref(), Some("sig_123"));
439            }
440            other => panic!("expected Part::ToolResponse, got {other:?}"),
441        }
442        // Round-trip
443        let serialized = serde_json::to_string(&part).expect("should serialize");
444        let deserialized: Part =
445            serde_json::from_str(&serialized).expect("should deserialize again");
446        assert_eq!(part, deserialized);
447    }
448
449    #[test]
450    fn test_code_execution_parts_preserve_thought_signature() {
451        let executable = serde_json::json!({
452            "executableCode": { "language": "python", "code": "print(1)" },
453            "thoughtSignature": "sig_exec"
454        });
455        let result = serde_json::json!({
456            "codeExecutionResult": { "outcome": "OUTCOME_OK", "output": "1" },
457            "thoughtSignature": "sig_result"
458        });
459
460        let executable_part: Part =
461            serde_json::from_value(executable).expect("should deserialize executable code");
462        let result_part: Part =
463            serde_json::from_value(result).expect("should deserialize code execution result");
464
465        match executable_part {
466            Part::ExecutableCode { thought_signature, .. } => {
467                assert_eq!(thought_signature.as_deref(), Some("sig_exec"));
468            }
469            other => panic!("expected Part::ExecutableCode, got {other:?}"),
470        }
471
472        match result_part {
473            Part::CodeExecutionResult { thought_signature, .. } => {
474                assert_eq!(thought_signature.as_deref(), Some("sig_result"));
475            }
476            other => panic!("expected Part::CodeExecutionResult, got {other:?}"),
477        }
478    }
479
480    // ===== Multimodal function response tests =====
481
482    #[test]
483    fn test_file_data_ref_serde_round_trip() {
484        let file_ref = FileDataRef {
485            mime_type: "application/pdf".to_string(),
486            file_uri: "gs://bucket/report.pdf".to_string(),
487        };
488        let json = serde_json::to_string(&file_ref).unwrap();
489        assert!(json.contains("mimeType"));
490        assert!(json.contains("fileUri"));
491        let deserialized: FileDataRef = serde_json::from_str(&json).unwrap();
492        assert_eq!(file_ref, deserialized);
493    }
494
495    #[test]
496    fn test_part_file_data_serde_round_trip() {
497        let part = Part::FileData {
498            file_data: FileDataRef {
499                mime_type: "image/jpeg".to_string(),
500                file_uri: "https://example.com/img.jpg".to_string(),
501            },
502        };
503        let json = serde_json::to_string(&part).unwrap();
504        assert!(json.contains("fileData"));
505        let deserialized: Part = serde_json::from_str(&json).unwrap();
506        assert_eq!(part, deserialized);
507    }
508
509    #[test]
510    fn test_function_response_new_backward_compat() {
511        let fr =
512            super::super::tools::FunctionResponse::new("tool", serde_json::json!({"ok": true}));
513        let json = serde_json::to_string(&fr).unwrap();
514        // Should only have name and response — no inline_data or file_data keys
515        let map: serde_json::Map<String, serde_json::Value> = serde_json::from_str(&json).unwrap();
516        assert!(map.contains_key("name"));
517        assert!(map.contains_key("response"));
518        assert!(!map.contains_key("inline_data"));
519        assert!(!map.contains_key("file_data"));
520    }
521
522    #[test]
523    fn test_function_response_with_inline_data_constructor() {
524        let blobs = vec![Blob::new("image/png", "base64data")];
525        let fr = super::super::tools::FunctionResponse::with_inline_data(
526            "chart",
527            serde_json::json!({"status": "ok"}),
528            blobs.clone(),
529        );
530        assert_eq!(fr.name, "chart");
531        assert_eq!(fr.parts.len(), 1);
532        assert!(matches!(
533            &fr.parts[0],
534            super::super::tools::FunctionResponsePart::InlineData { inline_data }
535            if inline_data == &blobs[0]
536        ));
537    }
538
539    #[test]
540    fn test_function_response_with_file_data_constructor() {
541        let files = vec![FileDataRef {
542            mime_type: "application/pdf".to_string(),
543            file_uri: "gs://b/f.pdf".to_string(),
544        }];
545        let fr = super::super::tools::FunctionResponse::with_file_data(
546            "doc",
547            serde_json::json!({"ok": true}),
548            files.clone(),
549        );
550        assert_eq!(fr.name, "doc");
551        assert_eq!(fr.parts.len(), 1);
552        assert!(matches!(
553            &fr.parts[0],
554            super::super::tools::FunctionResponsePart::FileData { file_data }
555            if file_data == &files[0]
556        ));
557    }
558
559    #[test]
560    fn test_function_response_inline_data_only_constructor() {
561        let blobs = vec![Blob::new("audio/wav", "audiodata")];
562        let fr =
563            super::super::tools::FunctionResponse::inline_data_only("audio_tool", blobs.clone());
564        assert_eq!(fr.name, "audio_tool");
565        assert!(fr.response.is_none());
566        assert_eq!(fr.parts.len(), 1);
567    }
568
569    #[test]
570    fn test_content_function_response_multimodal_parts_nested() {
571        use super::super::tools::FunctionResponsePart;
572        let blobs = [Blob::new("image/png", "img1"), Blob::new("image/jpeg", "img2")];
573        let files = [FileDataRef {
574            mime_type: "application/pdf".to_string(),
575            file_uri: "gs://b/f.pdf".to_string(),
576        }];
577        let mut fr_parts: Vec<FunctionResponsePart> = blobs
578            .iter()
579            .map(|b| FunctionResponsePart::InlineData { inline_data: b.clone() })
580            .collect();
581        fr_parts
582            .extend(files.iter().map(|f| FunctionResponsePart::FileData { file_data: f.clone() }));
583        let fr = super::super::tools::FunctionResponse {
584            name: "tool".to_string(),
585            response: Some(serde_json::json!({"ok": true})),
586            parts: fr_parts,
587        };
588        let content = Content::function_response_multimodal(fr);
589        let content_parts = content.parts.unwrap();
590        // Single FunctionResponse part in the Content
591        assert_eq!(content_parts.len(), 1);
592        assert!(matches!(&content_parts[0], Part::FunctionResponse { .. }));
593        // The multimodal data is nested inside the FunctionResponse
594        if let Part::FunctionResponse { function_response, .. } = &content_parts[0] {
595            // 2 inline + 1 file = 3 nested parts
596            assert_eq!(function_response.parts.len(), 3);
597        } else {
598            panic!("expected FunctionResponse part");
599        }
600    }
601
602    #[test]
603    fn test_multimodal_function_response_wire_format() {
604        // Verify the serialized JSON matches the Gemini API wire format:
605        // The `parts` array with `inlineData` lives INSIDE the `functionResponse` object.
606        use super::super::tools::FunctionResponsePart;
607        let fr = super::super::tools::FunctionResponse {
608            name: "get_image".to_string(),
609            response: Some(serde_json::json!({"image_ref": {"$ref": "photo.jpg"}})),
610            parts: vec![FunctionResponsePart::InlineData {
611                inline_data: Blob::new("image/jpeg", "base64encodeddata"),
612            }],
613        };
614
615        let part = Part::FunctionResponse { function_response: fr, thought_signature: None };
616        let json = serde_json::to_value(&part).unwrap();
617
618        // The functionResponse object should contain name, response, AND parts
619        let fr_obj = &json["functionResponse"];
620        assert_eq!(fr_obj["name"], "get_image");
621        assert!(fr_obj["response"].is_object());
622        assert!(fr_obj["parts"].is_array());
623        assert_eq!(fr_obj["parts"].as_array().unwrap().len(), 1);
624
625        // The nested part should have inlineData with mimeType and data
626        let inline = &fr_obj["parts"][0]["inlineData"];
627        assert_eq!(inline["mimeType"], "image/jpeg");
628        assert_eq!(inline["data"], "base64encodeddata");
629    }
630
631    #[test]
632    fn test_json_only_function_response_has_no_parts_key() {
633        // When there are no multimodal parts, the `parts` key should be absent
634        let fr = super::super::tools::FunctionResponse::new(
635            "simple_tool",
636            serde_json::json!({"result": "ok"}),
637        );
638        let part = Part::FunctionResponse { function_response: fr, thought_signature: None };
639        let json = serde_json::to_string(&part).unwrap();
640        // Should NOT contain "parts" key at all
641        assert!(
642            !json.contains(r#""parts""#),
643            "JSON-only response should not have parts key: {json}"
644        );
645    }
646}