openai_protocol/
interactions.rs

1// Gemini Interactions API types
2// https://ai.google.dev/gemini-api/docs/interactions
3
4use std::collections::HashMap;
5
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use serde_with::skip_serializing_none;
9use validator::{Validate, ValidationError};
10
11use super::{
12    common::{default_model, default_true, Function, GenerationRequest},
13    sampling_params::validate_top_p_value,
14};
15
16// ============================================================================
17// Request Type
18// ============================================================================
19
20#[skip_serializing_none]
21#[derive(Debug, Clone, Deserialize, Serialize, Validate)]
22#[validate(schema(function = "validate_interactions_request"))]
23pub struct InteractionsRequest {
24    /// Model identifier (e.g., "gemini-2.5-flash")
25    /// Required if agent is not provided
26    pub model: Option<String>,
27
28    /// Agent name (e.g., "deep-research-pro-preview-12-2025")
29    /// Required if model is not provided
30    pub agent: Option<String>,
31
32    /// Input content - can be string or array of Content objects
33    #[validate(custom(function = "validate_input"))]
34    pub input: InteractionsInput,
35
36    /// System instruction for the model
37    pub system_instruction: Option<String>,
38
39    /// Available tools
40    #[validate(custom(function = "validate_tools"))]
41    pub tools: Option<Vec<InteractionsTool>>,
42
43    /// Response format for structured outputs
44    pub response_format: Option<Value>,
45
46    /// MIME type for the response (required if response_format is set)
47    pub response_mime_type: Option<String>,
48
49    /// Whether to stream the response
50    #[serde(default)]
51    pub stream: bool,
52
53    /// Whether to store the interaction (default: true)
54    #[serde(default = "default_true")]
55    pub store: bool,
56
57    /// Run request in background (agents only)
58    #[serde(default)]
59    pub background: bool,
60
61    /// Generation configuration
62    #[validate(nested)]
63    pub generation_config: Option<GenerationConfig>,
64
65    /// Agent configuration (only applicable when agent is specified)
66    pub agent_config: Option<AgentConfig>,
67
68    /// Response modalities (text, image, audio)
69    pub response_modalities: Option<Vec<ResponseModality>>,
70
71    /// Link to prior interaction for stateful conversations
72    pub previous_interaction_id: Option<String>,
73}
74
75impl Default for InteractionsRequest {
76    fn default() -> Self {
77        Self {
78            model: Some(default_model()),
79            agent: None,
80            agent_config: None,
81            input: InteractionsInput::Text(String::new()),
82            system_instruction: None,
83            previous_interaction_id: None,
84            tools: None,
85            generation_config: None,
86            response_format: None,
87            response_mime_type: None,
88            response_modalities: None,
89            stream: false,
90            background: false,
91            store: true,
92        }
93    }
94}
95
96impl GenerationRequest for InteractionsRequest {
97    fn is_stream(&self) -> bool {
98        self.stream
99    }
100
101    fn get_model(&self) -> Option<&str> {
102        self.model.as_deref()
103    }
104
105    fn extract_text_for_routing(&self) -> String {
106        fn extract_from_content(content: &Content) -> Option<String> {
107            match content {
108                Content::Text { text, .. } => text.clone(),
109                _ => None,
110            }
111        }
112
113        fn extract_from_turn(turn: &Turn) -> String {
114            match &turn.content {
115                Some(TurnContent::Text(text)) => text.clone(),
116                Some(TurnContent::Contents(contents)) => contents
117                    .iter()
118                    .filter_map(extract_from_content)
119                    .collect::<Vec<String>>()
120                    .join(" "),
121                None => String::new(),
122            }
123        }
124
125        match &self.input {
126            InteractionsInput::Text(text) => text.clone(),
127            InteractionsInput::Content(content) => {
128                extract_from_content(content).unwrap_or_default()
129            }
130            InteractionsInput::Contents(contents) => contents
131                .iter()
132                .filter_map(extract_from_content)
133                .collect::<Vec<String>>()
134                .join(" "),
135            InteractionsInput::Turns(turns) => turns
136                .iter()
137                .map(extract_from_turn)
138                .collect::<Vec<String>>()
139                .join(" "),
140        }
141    }
142}
143
144// ============================================================================
145// Response Type
146// ============================================================================
147
148#[skip_serializing_none]
149#[derive(Debug, Clone, Default, Deserialize, Serialize)]
150pub struct Interaction {
151    /// Object type, always "interaction"
152    pub object: Option<String>,
153
154    /// Model used
155    pub model: Option<String>,
156
157    /// Agent used
158    pub agent: Option<String>,
159
160    /// Interaction ID
161    pub id: String,
162
163    /// Interaction status
164    pub status: InteractionsStatus,
165
166    /// Creation timestamp (ISO 8601)
167    pub created: Option<String>,
168
169    /// Last update timestamp (ISO 8601)
170    pub updated: Option<String>,
171
172    /// Role of the interaction
173    pub role: Option<String>,
174
175    /// Output content
176    pub outputs: Option<Vec<Content>>,
177
178    /// Usage information
179    pub usage: Option<InteractionsUsage>,
180
181    /// Previous interaction ID for conversation threading
182    pub previous_interaction_id: Option<String>,
183}
184
185impl Interaction {
186    /// Check if the interaction is complete
187    pub fn is_complete(&self) -> bool {
188        matches!(self.status, InteractionsStatus::Completed)
189    }
190
191    /// Check if the interaction is in progress
192    pub fn is_in_progress(&self) -> bool {
193        matches!(self.status, InteractionsStatus::InProgress)
194    }
195
196    /// Check if the interaction failed
197    pub fn is_failed(&self) -> bool {
198        matches!(self.status, InteractionsStatus::Failed)
199    }
200
201    /// Check if the interaction requires action (tool execution)
202    pub fn requires_action(&self) -> bool {
203        matches!(self.status, InteractionsStatus::RequiresAction)
204    }
205}
206
207// ============================================================================
208// Streaming Event Types (SSE)
209// ============================================================================
210
211/// Server-Sent Event for Interactions API streaming
212/// See: https://ai.google.dev/api/interactions-api#streaming
213#[skip_serializing_none]
214#[derive(Debug, Clone, Deserialize, Serialize)]
215#[serde(tag = "event_type")]
216pub enum InteractionStreamEvent {
217    /// Emitted when an interaction begins processing
218    #[serde(rename = "interaction.start")]
219    InteractionStart {
220        /// The interaction object
221        interaction: Option<Interaction>,
222        /// Event ID for resuming streams
223        event_id: Option<String>,
224    },
225
226    /// Emitted when an interaction completes
227    #[serde(rename = "interaction.complete")]
228    InteractionComplete {
229        /// The interaction object
230        interaction: Option<Interaction>,
231        /// Event ID for resuming streams
232        event_id: Option<String>,
233    },
234
235    /// Emitted when interaction status changes
236    #[serde(rename = "interaction.status_update")]
237    InteractionStatusUpdate {
238        /// The interaction ID
239        interaction_id: Option<String>,
240        /// The new status
241        status: Option<InteractionsStatus>,
242        /// Event ID for resuming streams
243        event_id: Option<String>,
244    },
245
246    /// Signals the beginning of a new content block
247    #[serde(rename = "content.start")]
248    ContentStart {
249        /// Content block index in outputs array
250        index: Option<u32>,
251        /// The content object
252        content: Option<Content>,
253        /// Event ID for resuming streams
254        event_id: Option<String>,
255    },
256
257    /// Streams incremental content updates
258    #[serde(rename = "content.delta")]
259    ContentDelta {
260        /// Content block index in outputs array
261        index: Option<u32>,
262        /// Event ID for resuming streams
263        event_id: Option<String>,
264        /// The delta content
265        delta: Option<Delta>,
266    },
267
268    /// Marks the end of a content block
269    #[serde(rename = "content.stop")]
270    ContentStop {
271        /// Content block index in outputs array
272        index: Option<u32>,
273        /// Event ID for resuming streams
274        event_id: Option<String>,
275    },
276
277    /// Error event
278    #[serde(rename = "error")]
279    Error {
280        /// Error information
281        error: Option<InteractionsError>,
282        /// Event ID for resuming streams
283        event_id: Option<String>,
284    },
285}
286
287/// Delta content for streaming updates
288/// See: https://ai.google.dev/api/interactions-api#ContentDelta
289#[skip_serializing_none]
290#[derive(Debug, Clone, Deserialize, Serialize)]
291#[serde(tag = "type", rename_all = "snake_case")]
292pub enum Delta {
293    /// Text delta
294    Text {
295        text: Option<String>,
296        annotations: Option<Vec<Annotation>>,
297    },
298    /// Image delta
299    Image {
300        data: Option<String>,
301        uri: Option<String>,
302        mime_type: Option<ImageMimeType>,
303        resolution: Option<MediaResolution>,
304    },
305    /// Audio delta
306    Audio {
307        data: Option<String>,
308        uri: Option<String>,
309        mime_type: Option<AudioMimeType>,
310    },
311    /// Document delta
312    Document {
313        data: Option<String>,
314        uri: Option<String>,
315        mime_type: Option<DocumentMimeType>,
316    },
317    /// Video delta
318    Video {
319        data: Option<String>,
320        uri: Option<String>,
321        mime_type: Option<VideoMimeType>,
322        resolution: Option<MediaResolution>,
323    },
324    /// Thought summary delta
325    ThoughtSummary {
326        content: Option<ThoughtSummaryContent>,
327    },
328    /// Thought signature delta
329    ThoughtSignature { signature: Option<String> },
330    /// Function call delta
331    FunctionCall {
332        name: Option<String>,
333        arguments: Option<String>,
334        id: Option<String>,
335    },
336    /// Function result delta
337    FunctionResult {
338        name: Option<String>,
339        is_error: Option<bool>,
340        result: Option<Value>,
341        call_id: Option<String>,
342    },
343    /// Code execution call delta
344    CodeExecutionCall {
345        arguments: Option<CodeExecutionArguments>,
346        id: Option<String>,
347    },
348    /// Code execution result delta
349    CodeExecutionResult {
350        result: Option<String>,
351        is_error: Option<bool>,
352        signature: Option<String>,
353        call_id: Option<String>,
354    },
355    /// URL context call delta
356    UrlContextCall {
357        arguments: Option<UrlContextArguments>,
358        id: Option<String>,
359    },
360    /// URL context result delta
361    UrlContextResult {
362        signature: Option<String>,
363        result: Option<Vec<UrlContextResultData>>,
364        is_error: Option<bool>,
365        call_id: Option<String>,
366    },
367    /// Google search call delta
368    GoogleSearchCall {
369        arguments: Option<GoogleSearchArguments>,
370        id: Option<String>,
371    },
372    /// Google search result delta
373    GoogleSearchResult {
374        signature: Option<String>,
375        result: Option<Vec<GoogleSearchResultData>>,
376        is_error: Option<bool>,
377        call_id: Option<String>,
378    },
379    /// File search call delta
380    FileSearchCall { id: Option<String> },
381    /// File search result delta
382    FileSearchResult {
383        result: Option<Vec<FileSearchResultData>>,
384    },
385    /// MCP server tool call delta
386    McpServerToolCall {
387        name: Option<String>,
388        server_name: Option<String>,
389        arguments: Option<Value>,
390        id: Option<String>,
391    },
392    /// MCP server tool result delta
393    McpServerToolResult {
394        name: Option<String>,
395        server_name: Option<String>,
396        result: Option<Value>,
397        call_id: Option<String>,
398    },
399}
400
401/// Error information in streaming events
402#[skip_serializing_none]
403#[derive(Debug, Clone, Deserialize, Serialize)]
404pub struct InteractionsError {
405    /// Error code
406    pub code: Option<String>,
407    /// Error message
408    pub message: Option<String>,
409}
410
411// ============================================================================
412// Query Parameters
413// ============================================================================
414
415/// Query parameters for GET /interactions/{id}
416#[skip_serializing_none]
417#[derive(Debug, Clone, Default, Deserialize, Serialize)]
418pub struct InteractionsGetParams {
419    /// Whether to stream the response
420    pub stream: Option<bool>,
421    /// Last event ID for resuming a stream
422    pub last_event_id: Option<String>,
423    /// API version
424    pub api_version: Option<String>,
425}
426
427/// Query parameters for DELETE /interactions/{id}
428#[skip_serializing_none]
429#[derive(Debug, Clone, Default, Deserialize, Serialize)]
430pub struct InteractionsDeleteParams {
431    /// API version
432    pub api_version: Option<String>,
433}
434
435/// Query parameters for POST /interactions/{id}/cancel
436#[skip_serializing_none]
437#[derive(Debug, Clone, Default, Deserialize, Serialize)]
438pub struct InteractionsCancelParams {
439    /// API version
440    pub api_version: Option<String>,
441}
442
443// ============================================================================
444// Interaction Tools
445// ============================================================================
446
447/// Interaction tool types
448/// See: https://ai.google.dev/api/interactions-api#Resource:Tool
449#[skip_serializing_none]
450#[derive(Debug, Clone, Deserialize, Serialize)]
451#[serde(tag = "type", rename_all = "snake_case")]
452pub enum InteractionsTool {
453    /// Function tool with function declaration
454    Function(Function),
455    /// Google Search built-in tool
456    GoogleSearch {},
457    /// Code Execution built-in tool
458    CodeExecution {},
459    /// URL Context built-in tool
460    UrlContext {},
461    /// MCP Server tool
462    McpServer {
463        name: Option<String>,
464        url: Option<String>,
465        headers: Option<HashMap<String, String>>,
466        allowed_tools: Option<AllowedTools>,
467    },
468    /// File Search built-in tool
469    FileSearch {
470        /// Names of file search stores to search
471        file_search_store_names: Option<Vec<String>>,
472        /// Maximum number of results to return
473        top_k: Option<u32>,
474        /// Metadata filter for search
475        metadata_filter: Option<String>,
476    },
477}
478
479/// Allowed tools configuration for MCP server
480#[skip_serializing_none]
481#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
482pub struct AllowedTools {
483    /// Tool choice mode: auto, any, none, or validated
484    pub mode: Option<ToolChoiceType>,
485    /// List of allowed tool names
486    pub tools: Option<Vec<String>>,
487}
488
489#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
490#[serde(rename_all = "snake_case")]
491pub enum ToolChoiceType {
492    Auto,
493    Any,
494    None,
495    Validated,
496}
497
498// ============================================================================
499// Generation Config (Gemini-specific)
500// ============================================================================
501
502#[skip_serializing_none]
503#[derive(Debug, Clone, Deserialize, Serialize, Validate)]
504pub struct GenerationConfig {
505    #[validate(range(min = 0.0, max = 2.0))]
506    pub temperature: Option<f32>,
507
508    #[validate(custom(function = "validate_top_p_value"))]
509    pub top_p: Option<f32>,
510
511    pub seed: Option<i64>,
512
513    #[validate(custom(function = "validate_stop_sequences"))]
514    pub stop_sequences: Option<Vec<String>>,
515
516    pub tool_choice: Option<ToolChoice>,
517
518    pub thinking_level: Option<ThinkingLevel>,
519
520    pub thinking_summaries: Option<ThinkingSummaries>,
521
522    #[validate(range(min = 1))]
523    pub max_output_tokens: Option<u32>,
524
525    pub speech_config: Option<Vec<SpeechConfig>>,
526
527    pub image_config: Option<ImageConfig>,
528}
529
530#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
531#[serde(rename_all = "snake_case")]
532pub enum ThinkingLevel {
533    Minimal,
534    Low,
535    Medium,
536    High,
537}
538
539#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
540#[serde(rename_all = "snake_case")]
541pub enum ThinkingSummaries {
542    Auto,
543    None,
544}
545
546/// Tool choice can be a simple mode or a detailed config
547#[derive(Debug, Clone, Deserialize, Serialize)]
548#[serde(untagged)]
549pub enum ToolChoice {
550    Type(ToolChoiceType),
551    Config(ToolChoiceConfig),
552}
553
554#[skip_serializing_none]
555#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
556pub struct ToolChoiceConfig {
557    pub allowed_tools: Option<AllowedTools>,
558}
559
560#[skip_serializing_none]
561#[derive(Debug, Clone, Deserialize, Serialize)]
562pub struct SpeechConfig {
563    pub voice: Option<String>,
564    pub language: Option<String>,
565    pub speaker: Option<String>,
566}
567
568#[skip_serializing_none]
569#[derive(Debug, Clone, Deserialize, Serialize)]
570pub struct ImageConfig {
571    pub aspect_ratio: Option<AspectRatio>,
572    pub image_size: Option<ImageSize>,
573}
574
575#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
576pub enum AspectRatio {
577    #[serde(rename = "1:1")]
578    Square,
579    #[serde(rename = "2:3")]
580    Portrait2x3,
581    #[serde(rename = "3:2")]
582    Landscape3x2,
583    #[serde(rename = "3:4")]
584    Portrait3x4,
585    #[serde(rename = "4:3")]
586    Landscape4x3,
587    #[serde(rename = "4:5")]
588    Portrait4x5,
589    #[serde(rename = "5:4")]
590    Landscape5x4,
591    #[serde(rename = "9:16")]
592    Portrait9x16,
593    #[serde(rename = "16:9")]
594    Landscape16x9,
595    #[serde(rename = "21:9")]
596    UltraWide,
597}
598
599#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
600pub enum ImageSize {
601    #[serde(rename = "1K")]
602    OneK,
603    #[serde(rename = "2K")]
604    TwoK,
605    #[serde(rename = "4K")]
606    FourK,
607}
608
609/// Agent configuration
610/// See: https://ai.google.dev/api/interactions-api#CreateInteraction-deep_research
611#[derive(Debug, Clone, Deserialize, Serialize)]
612#[serde(tag = "type", rename_all = "snake_case")]
613pub enum AgentConfig {
614    /// Dynamic agent configuration
615    Dynamic {},
616    /// Deep Research agent configuration
617    #[serde(rename = "deep-research")]
618    DeepResearch {
619        /// Whether to include thought summaries ("auto" or "none")
620        #[serde(skip_serializing_if = "Option::is_none")]
621        thinking_summaries: Option<ThinkingSummaries>,
622    },
623}
624
625// ============================================================================
626// Input/Output Types
627// ============================================================================
628
629/// Input can be Content, array of Content, array of Turn, or string
630/// See: https://ai.google.dev/api/interactions-api#request-body
631#[derive(Debug, Clone, Deserialize, Serialize)]
632#[serde(untagged)]
633pub enum InteractionsInput {
634    /// Simple text input
635    Text(String),
636    /// Single content block
637    Content(Content),
638    /// Array of content blocks
639    Contents(Vec<Content>),
640    /// Array of turns (conversation history)
641    Turns(Vec<Turn>),
642}
643
644/// A turn in a conversation with role and content
645/// See: https://ai.google.dev/api/interactions-api#Resource:Turn
646#[skip_serializing_none]
647#[derive(Debug, Clone, Deserialize, Serialize)]
648pub struct Turn {
649    /// Role: "user" or "model"
650    pub role: Option<String>,
651    /// Content can be array of Content or string
652    pub content: Option<TurnContent>,
653}
654
655/// Turn content can be array of Content or a simple string
656#[derive(Debug, Clone, Deserialize, Serialize)]
657#[serde(untagged)]
658pub enum TurnContent {
659    Contents(Vec<Content>),
660    Text(String),
661}
662
663/// Content is a polymorphic type representing different content types
664/// See: https://ai.google.dev/api/interactions-api#Resource:Content
665#[skip_serializing_none]
666#[derive(Debug, Clone, Deserialize, Serialize)]
667#[serde(tag = "type", rename_all = "snake_case")]
668pub enum Content {
669    /// Text content
670    Text {
671        text: Option<String>,
672        annotations: Option<Vec<Annotation>>,
673    },
674
675    /// Image content
676    Image {
677        data: Option<String>,
678        uri: Option<String>,
679        mime_type: Option<ImageMimeType>,
680        resolution: Option<MediaResolution>,
681    },
682
683    /// Audio content
684    Audio {
685        data: Option<String>,
686        uri: Option<String>,
687        mime_type: Option<AudioMimeType>,
688    },
689
690    /// Document content (PDF)
691    Document {
692        data: Option<String>,
693        uri: Option<String>,
694        mime_type: Option<DocumentMimeType>,
695    },
696
697    /// Video content
698    Video {
699        data: Option<String>,
700        uri: Option<String>,
701        mime_type: Option<VideoMimeType>,
702        resolution: Option<MediaResolution>,
703    },
704
705    /// Thought content
706    Thought {
707        signature: Option<String>,
708        summary: Option<Vec<ThoughtSummaryContent>>,
709    },
710
711    /// Function call content
712    FunctionCall {
713        name: String,
714        arguments: Value,
715        id: String,
716    },
717
718    /// Function result content
719    FunctionResult {
720        name: Option<String>,
721        is_error: Option<bool>,
722        result: Value,
723        call_id: String,
724    },
725
726    /// Code execution call content
727    CodeExecutionCall {
728        arguments: Option<CodeExecutionArguments>,
729        id: Option<String>,
730    },
731
732    /// Code execution result content
733    CodeExecutionResult {
734        result: Option<String>,
735        is_error: Option<bool>,
736        signature: Option<String>,
737        call_id: Option<String>,
738    },
739
740    /// URL context call content
741    UrlContextCall {
742        arguments: Option<UrlContextArguments>,
743        id: Option<String>,
744    },
745
746    /// URL context result content
747    UrlContextResult {
748        signature: Option<String>,
749        result: Option<Vec<UrlContextResultData>>,
750        is_error: Option<bool>,
751        call_id: Option<String>,
752    },
753
754    /// Google search call content
755    GoogleSearchCall {
756        arguments: Option<GoogleSearchArguments>,
757        id: Option<String>,
758    },
759
760    /// Google search result content
761    GoogleSearchResult {
762        signature: Option<String>,
763        result: Option<Vec<GoogleSearchResultData>>,
764        is_error: Option<bool>,
765        call_id: Option<String>,
766    },
767
768    /// File search call content
769    FileSearchCall { id: Option<String> },
770
771    /// File search result content
772    FileSearchResult {
773        result: Option<Vec<FileSearchResultData>>,
774    },
775
776    /// MCP server tool call content
777    McpServerToolCall {
778        name: String,
779        server_name: String,
780        arguments: Value,
781        id: String,
782    },
783
784    /// MCP server tool result content
785    McpServerToolResult {
786        name: Option<String>,
787        server_name: Option<String>,
788        result: Value,
789        call_id: String,
790    },
791}
792
793/// Content types allowed in thought summary (text or image only)
794#[skip_serializing_none]
795#[derive(Debug, Clone, Deserialize, Serialize)]
796#[serde(tag = "type", rename_all = "snake_case")]
797pub enum ThoughtSummaryContent {
798    /// Text content in thought summary
799    Text {
800        text: Option<String>,
801        annotations: Option<Vec<Annotation>>,
802    },
803    /// Image content in thought summary
804    Image {
805        data: Option<String>,
806        uri: Option<String>,
807        mime_type: Option<ImageMimeType>,
808        resolution: Option<MediaResolution>,
809    },
810}
811
812/// Annotation for text content (citations)
813#[skip_serializing_none]
814#[derive(Debug, Clone, Deserialize, Serialize)]
815pub struct Annotation {
816    /// Start of the attributed segment, measured in bytes
817    pub start_index: Option<u32>,
818    /// End of the attributed segment, exclusive
819    pub end_index: Option<u32>,
820    /// Source attributed for a portion of the text (URL, title, or other identifier)
821    pub source: Option<String>,
822}
823
824/// Arguments for URL context call
825#[skip_serializing_none]
826#[derive(Debug, Clone, Deserialize, Serialize)]
827pub struct UrlContextArguments {
828    /// The URLs to fetch
829    pub urls: Option<Vec<String>>,
830}
831
832/// Result data for URL context result
833#[skip_serializing_none]
834#[derive(Debug, Clone, Deserialize, Serialize)]
835pub struct UrlContextResultData {
836    /// The URL that was fetched
837    pub url: Option<String>,
838    /// The status of the URL retrieval
839    pub status: Option<UrlContextStatus>,
840}
841
842/// Status of URL context retrieval
843#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
844#[serde(rename_all = "snake_case")]
845pub enum UrlContextStatus {
846    Success,
847    Error,
848    Paywall,
849    Unsafe,
850}
851
852/// Arguments for Google search call
853#[skip_serializing_none]
854#[derive(Debug, Clone, Deserialize, Serialize)]
855pub struct GoogleSearchArguments {
856    /// Web search queries
857    pub queries: Option<Vec<String>>,
858}
859
860/// Result data for Google search result
861#[skip_serializing_none]
862#[derive(Debug, Clone, Deserialize, Serialize)]
863pub struct GoogleSearchResultData {
864    /// URI reference of the search result
865    pub url: Option<String>,
866    /// Title of the search result
867    pub title: Option<String>,
868    /// Web content snippet
869    pub rendered_content: Option<String>,
870}
871
872/// Result data for file search result
873#[skip_serializing_none]
874#[derive(Debug, Clone, Deserialize, Serialize)]
875pub struct FileSearchResultData {
876    /// Search result title
877    pub title: Option<String>,
878    /// Search result text
879    pub text: Option<String>,
880    /// Name of the file search store
881    pub file_search_store: Option<String>,
882}
883
884/// Arguments for code execution call
885#[skip_serializing_none]
886#[derive(Debug, Clone, Deserialize, Serialize)]
887pub struct CodeExecutionArguments {
888    /// Programming language (currently only Python is supported)
889    pub language: Option<CodeExecutionLanguage>,
890    /// The code to be executed
891    pub code: Option<String>,
892}
893
894/// Supported languages for code execution
895#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
896#[serde(rename_all = "snake_case")]
897pub enum CodeExecutionLanguage {
898    Python,
899}
900
901/// Image/video resolution options
902#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
903#[serde(rename_all = "snake_case")]
904pub enum MediaResolution {
905    Low,
906    Medium,
907    High,
908    UltraHigh,
909}
910
911/// Supported image MIME types
912#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
913pub enum ImageMimeType {
914    #[serde(rename = "image/png")]
915    Png,
916    #[serde(rename = "image/jpeg")]
917    Jpeg,
918    #[serde(rename = "image/webp")]
919    Webp,
920    #[serde(rename = "image/heic")]
921    Heic,
922    #[serde(rename = "image/heif")]
923    Heif,
924}
925
926impl ImageMimeType {
927    pub fn as_str(&self) -> &'static str {
928        match self {
929            ImageMimeType::Png => "image/png",
930            ImageMimeType::Jpeg => "image/jpeg",
931            ImageMimeType::Webp => "image/webp",
932            ImageMimeType::Heic => "image/heic",
933            ImageMimeType::Heif => "image/heif",
934        }
935    }
936}
937
938/// Supported audio MIME types
939#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
940pub enum AudioMimeType {
941    #[serde(rename = "audio/wav")]
942    Wav,
943    #[serde(rename = "audio/mp3")]
944    Mp3,
945    #[serde(rename = "audio/aiff")]
946    Aiff,
947    #[serde(rename = "audio/aac")]
948    Aac,
949    #[serde(rename = "audio/ogg")]
950    Ogg,
951    #[serde(rename = "audio/flac")]
952    Flac,
953}
954
955impl AudioMimeType {
956    pub fn as_str(&self) -> &'static str {
957        match self {
958            AudioMimeType::Wav => "audio/wav",
959            AudioMimeType::Mp3 => "audio/mp3",
960            AudioMimeType::Aiff => "audio/aiff",
961            AudioMimeType::Aac => "audio/aac",
962            AudioMimeType::Ogg => "audio/ogg",
963            AudioMimeType::Flac => "audio/flac",
964        }
965    }
966}
967
968/// Supported document MIME types
969#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
970pub enum DocumentMimeType {
971    #[serde(rename = "application/pdf")]
972    Pdf,
973}
974
975impl DocumentMimeType {
976    pub fn as_str(&self) -> &'static str {
977        match self {
978            DocumentMimeType::Pdf => "application/pdf",
979        }
980    }
981}
982
983/// Supported video MIME types
984#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
985pub enum VideoMimeType {
986    #[serde(rename = "video/mp4")]
987    Mp4,
988    #[serde(rename = "video/mpeg")]
989    Mpeg,
990    #[serde(rename = "video/mov")]
991    Mov,
992    #[serde(rename = "video/avi")]
993    Avi,
994    #[serde(rename = "video/x-flv")]
995    Flv,
996    #[serde(rename = "video/mpg")]
997    Mpg,
998    #[serde(rename = "video/webm")]
999    Webm,
1000    #[serde(rename = "video/wmv")]
1001    Wmv,
1002    #[serde(rename = "video/3gpp")]
1003    ThreeGpp,
1004}
1005
1006impl VideoMimeType {
1007    pub fn as_str(&self) -> &'static str {
1008        match self {
1009            VideoMimeType::Mp4 => "video/mp4",
1010            VideoMimeType::Mpeg => "video/mpeg",
1011            VideoMimeType::Mov => "video/mov",
1012            VideoMimeType::Avi => "video/avi",
1013            VideoMimeType::Flv => "video/x-flv",
1014            VideoMimeType::Mpg => "video/mpg",
1015            VideoMimeType::Webm => "video/webm",
1016            VideoMimeType::Wmv => "video/wmv",
1017            VideoMimeType::ThreeGpp => "video/3gpp",
1018        }
1019    }
1020}
1021
1022// ============================================================================
1023// Status Types
1024// ============================================================================
1025
1026#[derive(Debug, Clone, Default, PartialEq, Deserialize, Serialize)]
1027#[serde(rename_all = "snake_case")]
1028pub enum InteractionsStatus {
1029    #[default]
1030    InProgress,
1031    RequiresAction,
1032    Completed,
1033    Failed,
1034    Cancelled,
1035}
1036
1037// ============================================================================
1038// Usage Types
1039// ============================================================================
1040
1041/// Token count by modality
1042#[skip_serializing_none]
1043#[derive(Debug, Clone, Deserialize, Serialize)]
1044pub struct ModalityTokens {
1045    pub modality: Option<ResponseModality>,
1046    pub tokens: Option<u32>,
1047}
1048
1049#[skip_serializing_none]
1050#[derive(Debug, Clone, Deserialize, Serialize)]
1051pub struct InteractionsUsage {
1052    pub total_input_tokens: Option<u32>,
1053    pub input_tokens_by_modality: Option<Vec<ModalityTokens>>,
1054    pub total_cached_tokens: Option<u32>,
1055    pub cached_tokens_by_modality: Option<Vec<ModalityTokens>>,
1056    pub total_output_tokens: Option<u32>,
1057    pub output_tokens_by_modality: Option<Vec<ModalityTokens>>,
1058    pub total_tool_use_tokens: Option<u32>,
1059    pub tool_use_tokens_by_modality: Option<Vec<ModalityTokens>>,
1060    pub total_thought_tokens: Option<u32>,
1061    pub total_tokens: Option<u32>,
1062}
1063
1064#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
1065#[serde(rename_all = "snake_case")]
1066pub enum ResponseModality {
1067    Text,
1068    Image,
1069    Audio,
1070}
1071
1072fn is_option_blank(v: Option<&String>) -> bool {
1073    v.map(|s| s.trim().is_empty()).unwrap_or(true)
1074}
1075
1076fn validate_interactions_request(req: &InteractionsRequest) -> Result<(), ValidationError> {
1077    // Exactly one of model or agent must be provided
1078    if is_option_blank(req.model.as_ref()) && is_option_blank(req.agent.as_ref()) {
1079        return Err(ValidationError::new("model_or_agent_required"));
1080    }
1081    if !is_option_blank(req.model.as_ref()) && !is_option_blank(req.agent.as_ref()) {
1082        let mut e = ValidationError::new("model_and_agent_mutually_exclusive");
1083        e.message = Some("Cannot set both model and agent. Provide exactly one.".into());
1084        return Err(e);
1085    }
1086
1087    // response_mime_type is required when response_format is set
1088    if req.response_format.is_some() && is_option_blank(req.response_mime_type.as_ref()) {
1089        return Err(ValidationError::new("response_mime_type_required"));
1090    }
1091
1092    // background mode is required for agent interactions, and only for agents
1093    if !is_option_blank(req.agent.as_ref()) && !req.background {
1094        let mut e = ValidationError::new("agent_requires_background");
1095        e.message = Some("Agent interactions require background mode to be enabled.".into());
1096        return Err(e);
1097    }
1098    if !is_option_blank(req.model.as_ref()) && req.background {
1099        let mut e = ValidationError::new("background_requires_agent");
1100        e.message = Some("Background mode is only supported for agent interactions.".into());
1101        return Err(e);
1102    }
1103
1104    // background and stream are mutually exclusive
1105    if req.background && req.stream {
1106        let mut e = ValidationError::new("background_conflicts_with_stream");
1107        e.message = Some("Cannot set both background and stream to true.".into());
1108        return Err(e);
1109    }
1110    Ok(())
1111}
1112
1113fn validate_tools(tools: &[InteractionsTool]) -> Result<(), ValidationError> {
1114    // FileSearch tool is not supported yet
1115    if tools
1116        .iter()
1117        .any(|t| matches!(t, InteractionsTool::FileSearch { .. }))
1118    {
1119        return Err(ValidationError::new("file_search_tool_not_supported"));
1120    }
1121    Ok(())
1122}
1123
1124fn validate_input(input: &InteractionsInput) -> Result<(), ValidationError> {
1125    // Reject empty input
1126    let empty_msg = match input {
1127        InteractionsInput::Text(s) if s.trim().is_empty() => Some("Input text cannot be empty"),
1128        InteractionsInput::Content(content) if is_content_empty(content) => {
1129            Some("Input content cannot be empty")
1130        }
1131        InteractionsInput::Contents(contents) if contents.is_empty() => {
1132            Some("Input content array cannot be empty")
1133        }
1134        InteractionsInput::Contents(contents) if contents.iter().any(is_content_empty) => {
1135            Some("Input content array contains empty content items")
1136        }
1137        InteractionsInput::Turns(turns) if turns.is_empty() => {
1138            Some("Input turns array cannot be empty")
1139        }
1140        InteractionsInput::Turns(turns) if turns.iter().any(is_turn_empty) => {
1141            Some("Input turns array contains empty turn items")
1142        }
1143        _ => None,
1144    };
1145    if let Some(msg) = empty_msg {
1146        let mut e = ValidationError::new("input_cannot_be_empty");
1147        e.message = Some(msg.into());
1148        return Err(e);
1149    }
1150
1151    // Reject unsupported file search content
1152    fn has_file_search_content(content: &Content) -> bool {
1153        matches!(
1154            content,
1155            Content::FileSearchCall { .. } | Content::FileSearchResult { .. }
1156        )
1157    }
1158
1159    fn check_turn(turn: &Turn) -> bool {
1160        if let Some(content) = &turn.content {
1161            match content {
1162                TurnContent::Contents(contents) => contents.iter().any(has_file_search_content),
1163                TurnContent::Text(_) => false,
1164            }
1165        } else {
1166            false
1167        }
1168    }
1169
1170    let has_file_search = match input {
1171        InteractionsInput::Text(_) => false,
1172        InteractionsInput::Content(content) => has_file_search_content(content),
1173        InteractionsInput::Contents(contents) => contents.iter().any(has_file_search_content),
1174        InteractionsInput::Turns(turns) => turns.iter().any(check_turn),
1175    };
1176
1177    if has_file_search {
1178        return Err(ValidationError::new("file_search_content_not_supported"));
1179    }
1180    Ok(())
1181}
1182
1183fn is_content_empty(content: &Content) -> bool {
1184    match content {
1185        Content::Text { text, .. } => is_option_blank(text.as_ref()),
1186        Content::Image { data, uri, .. }
1187        | Content::Audio { data, uri, .. }
1188        | Content::Document { data, uri, .. }
1189        | Content::Video { data, uri, .. } => {
1190            is_option_blank(data.as_ref()) && is_option_blank(uri.as_ref())
1191        }
1192        Content::CodeExecutionCall { id, .. }
1193        | Content::UrlContextCall { id, .. }
1194        | Content::GoogleSearchCall { id, .. } => is_option_blank(id.as_ref()),
1195        Content::CodeExecutionResult { call_id, .. }
1196        | Content::UrlContextResult { call_id, .. }
1197        | Content::GoogleSearchResult { call_id, .. } => is_option_blank(call_id.as_ref()),
1198        _ => false,
1199    }
1200}
1201
1202fn is_turn_empty(turn: &Turn) -> bool {
1203    match &turn.content {
1204        None => true,
1205        Some(TurnContent::Text(s)) => s.trim().is_empty(),
1206        Some(TurnContent::Contents(contents)) => {
1207            contents.is_empty() || contents.iter().any(is_content_empty)
1208        }
1209    }
1210}
1211
1212fn validate_stop_sequences(seqs: &[String]) -> Result<(), ValidationError> {
1213    if seqs.len() > 5 {
1214        let mut e = ValidationError::new("too_many_stop_sequences");
1215        e.message = Some("Maximum 5 stop sequences allowed".into());
1216        return Err(e);
1217    }
1218    if seqs.iter().any(|s| s.trim().is_empty()) {
1219        let mut e = ValidationError::new("stop_sequences_cannot_be_empty");
1220        e.message = Some("Stop sequences cannot contain empty strings".into());
1221        return Err(e);
1222    }
1223    Ok(())
1224}
openai_protocol/interactions.rs

openai_protocol/
interactions.rs