openai_protocol/
interactions.rs

1// Gemini Interactions API types
2// https://ai.google.dev/gemini-api/docs/interactions
3
4use std::collections::HashMap;
5
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use serde_with::skip_serializing_none;
9use validator::{Validate, ValidationError};
10
11use super::{
12    common::{default_model, default_true, Function, GenerationRequest},
13    sampling_params::validate_top_p_value,
14    validated::Normalizable,
15};
16
17// ============================================================================
18// Request Type
19// ============================================================================
20
21#[skip_serializing_none]
22#[derive(Debug, Clone, Deserialize, Serialize, Validate)]
23#[validate(schema(function = "validate_interactions_request"))]
24pub struct InteractionsRequest {
25    /// Model identifier (e.g., "gemini-2.5-flash")
26    /// Required if agent is not provided
27    pub model: Option<String>,
28
29    /// Agent name (e.g., "deep-research-pro-preview-12-2025")
30    /// Required if model is not provided
31    pub agent: Option<String>,
32
33    /// Input content - can be string or array of Content objects
34    #[validate(custom(function = "validate_input"))]
35    pub input: InteractionsInput,
36
37    /// System instruction for the model
38    pub system_instruction: Option<String>,
39
40    /// Available tools
41    #[validate(custom(function = "validate_tools"))]
42    pub tools: Option<Vec<InteractionsTool>>,
43
44    /// Response format for structured outputs
45    pub response_format: Option<Value>,
46
47    /// MIME type for the response (required if response_format is set)
48    pub response_mime_type: Option<String>,
49
50    /// Whether to stream the response
51    #[serde(default)]
52    pub stream: bool,
53
54    /// Whether to store the interaction (default: true)
55    #[serde(default = "default_true")]
56    pub store: bool,
57
58    /// Run request in background (agents only)
59    #[serde(default)]
60    pub background: bool,
61
62    /// Generation configuration
63    #[validate(nested)]
64    pub generation_config: Option<GenerationConfig>,
65
66    /// Agent configuration (only applicable when agent is specified)
67    pub agent_config: Option<AgentConfig>,
68
69    /// Response modalities (text, image, audio)
70    pub response_modalities: Option<Vec<ResponseModality>>,
71
72    /// Link to prior interaction for stateful conversations
73    pub previous_interaction_id: Option<String>,
74}
75
76impl Default for InteractionsRequest {
77    fn default() -> Self {
78        Self {
79            model: Some(default_model()),
80            agent: None,
81            agent_config: None,
82            input: InteractionsInput::Text(String::new()),
83            system_instruction: None,
84            previous_interaction_id: None,
85            tools: None,
86            generation_config: None,
87            response_format: None,
88            response_mime_type: None,
89            response_modalities: None,
90            stream: false,
91            background: false,
92            store: true,
93        }
94    }
95}
96
97impl Normalizable for InteractionsRequest {
98    // Use default no-op implementation
99}
100
101impl GenerationRequest for InteractionsRequest {
102    fn is_stream(&self) -> bool {
103        self.stream
104    }
105
106    fn get_model(&self) -> Option<&str> {
107        self.model.as_deref()
108    }
109
110    fn extract_text_for_routing(&self) -> String {
111        fn extract_from_content(content: &Content) -> Option<String> {
112            match content {
113                Content::Text { text, .. } => text.clone(),
114                _ => None,
115            }
116        }
117
118        fn extract_from_turn(turn: &Turn) -> String {
119            match &turn.content {
120                Some(TurnContent::Text(text)) => text.clone(),
121                Some(TurnContent::Contents(contents)) => contents
122                    .iter()
123                    .filter_map(extract_from_content)
124                    .collect::<Vec<String>>()
125                    .join(" "),
126                None => String::new(),
127            }
128        }
129
130        match &self.input {
131            InteractionsInput::Text(text) => text.clone(),
132            InteractionsInput::Content(content) => {
133                extract_from_content(content).unwrap_or_default()
134            }
135            InteractionsInput::Contents(contents) => contents
136                .iter()
137                .filter_map(extract_from_content)
138                .collect::<Vec<String>>()
139                .join(" "),
140            InteractionsInput::Turns(turns) => turns
141                .iter()
142                .map(extract_from_turn)
143                .collect::<Vec<String>>()
144                .join(" "),
145        }
146    }
147}
148
149// ============================================================================
150// Response Type
151// ============================================================================
152
153#[skip_serializing_none]
154#[derive(Debug, Clone, Default, Deserialize, Serialize)]
155pub struct Interaction {
156    /// Object type, always "interaction"
157    pub object: Option<String>,
158
159    /// Model used
160    pub model: Option<String>,
161
162    /// Agent used
163    pub agent: Option<String>,
164
165    /// Interaction ID
166    pub id: String,
167
168    /// Interaction status
169    pub status: InteractionsStatus,
170
171    /// Creation timestamp (ISO 8601)
172    pub created: Option<String>,
173
174    /// Last update timestamp (ISO 8601)
175    pub updated: Option<String>,
176
177    /// Role of the interaction
178    pub role: Option<String>,
179
180    /// Output content
181    pub outputs: Option<Vec<Content>>,
182
183    /// Usage information
184    pub usage: Option<InteractionsUsage>,
185
186    /// Previous interaction ID for conversation threading
187    pub previous_interaction_id: Option<String>,
188}
189
190impl Interaction {
191    /// Check if the interaction is complete
192    pub fn is_complete(&self) -> bool {
193        matches!(self.status, InteractionsStatus::Completed)
194    }
195
196    /// Check if the interaction is in progress
197    pub fn is_in_progress(&self) -> bool {
198        matches!(self.status, InteractionsStatus::InProgress)
199    }
200
201    /// Check if the interaction failed
202    pub fn is_failed(&self) -> bool {
203        matches!(self.status, InteractionsStatus::Failed)
204    }
205
206    /// Check if the interaction requires action (tool execution)
207    pub fn requires_action(&self) -> bool {
208        matches!(self.status, InteractionsStatus::RequiresAction)
209    }
210}
211
212// ============================================================================
213// Streaming Event Types (SSE)
214// ============================================================================
215
216/// Server-Sent Event for Interactions API streaming
217/// See: https://ai.google.dev/api/interactions-api#streaming
218#[skip_serializing_none]
219#[derive(Debug, Clone, Deserialize, Serialize)]
220#[serde(tag = "event_type")]
221pub enum InteractionStreamEvent {
222    /// Emitted when an interaction begins processing
223    #[serde(rename = "interaction.start")]
224    InteractionStart {
225        /// The interaction object
226        interaction: Option<Interaction>,
227        /// Event ID for resuming streams
228        event_id: Option<String>,
229    },
230
231    /// Emitted when an interaction completes
232    #[serde(rename = "interaction.complete")]
233    InteractionComplete {
234        /// The interaction object
235        interaction: Option<Interaction>,
236        /// Event ID for resuming streams
237        event_id: Option<String>,
238    },
239
240    /// Emitted when interaction status changes
241    #[serde(rename = "interaction.status_update")]
242    InteractionStatusUpdate {
243        /// The interaction ID
244        interaction_id: Option<String>,
245        /// The new status
246        status: Option<InteractionsStatus>,
247        /// Event ID for resuming streams
248        event_id: Option<String>,
249    },
250
251    /// Signals the beginning of a new content block
252    #[serde(rename = "content.start")]
253    ContentStart {
254        /// Content block index in outputs array
255        index: Option<u32>,
256        /// The content object
257        content: Option<Content>,
258        /// Event ID for resuming streams
259        event_id: Option<String>,
260    },
261
262    /// Streams incremental content updates
263    #[serde(rename = "content.delta")]
264    ContentDelta {
265        /// Content block index in outputs array
266        index: Option<u32>,
267        /// Event ID for resuming streams
268        event_id: Option<String>,
269        /// The delta content
270        delta: Option<Delta>,
271    },
272
273    /// Marks the end of a content block
274    #[serde(rename = "content.stop")]
275    ContentStop {
276        /// Content block index in outputs array
277        index: Option<u32>,
278        /// Event ID for resuming streams
279        event_id: Option<String>,
280    },
281
282    /// Error event
283    #[serde(rename = "error")]
284    Error {
285        /// Error information
286        error: Option<InteractionsError>,
287        /// Event ID for resuming streams
288        event_id: Option<String>,
289    },
290}
291
292/// Delta content for streaming updates
293/// See: https://ai.google.dev/api/interactions-api#ContentDelta
294#[skip_serializing_none]
295#[derive(Debug, Clone, Deserialize, Serialize)]
296#[serde(tag = "type", rename_all = "snake_case")]
297pub enum Delta {
298    /// Text delta
299    Text {
300        text: Option<String>,
301        annotations: Option<Vec<Annotation>>,
302    },
303    /// Image delta
304    Image {
305        data: Option<String>,
306        uri: Option<String>,
307        mime_type: Option<ImageMimeType>,
308        resolution: Option<MediaResolution>,
309    },
310    /// Audio delta
311    Audio {
312        data: Option<String>,
313        uri: Option<String>,
314        mime_type: Option<AudioMimeType>,
315    },
316    /// Document delta
317    Document {
318        data: Option<String>,
319        uri: Option<String>,
320        mime_type: Option<DocumentMimeType>,
321    },
322    /// Video delta
323    Video {
324        data: Option<String>,
325        uri: Option<String>,
326        mime_type: Option<VideoMimeType>,
327        resolution: Option<MediaResolution>,
328    },
329    /// Thought summary delta
330    ThoughtSummary {
331        content: Option<ThoughtSummaryContent>,
332    },
333    /// Thought signature delta
334    ThoughtSignature { signature: Option<String> },
335    /// Function call delta
336    FunctionCall {
337        name: Option<String>,
338        arguments: Option<String>,
339        id: Option<String>,
340    },
341    /// Function result delta
342    FunctionResult {
343        name: Option<String>,
344        is_error: Option<bool>,
345        result: Option<Value>,
346        call_id: Option<String>,
347    },
348    /// Code execution call delta
349    CodeExecutionCall {
350        arguments: Option<CodeExecutionArguments>,
351        id: Option<String>,
352    },
353    /// Code execution result delta
354    CodeExecutionResult {
355        result: Option<String>,
356        is_error: Option<bool>,
357        signature: Option<String>,
358        call_id: Option<String>,
359    },
360    /// URL context call delta
361    UrlContextCall {
362        arguments: Option<UrlContextArguments>,
363        id: Option<String>,
364    },
365    /// URL context result delta
366    UrlContextResult {
367        signature: Option<String>,
368        result: Option<Vec<UrlContextResultData>>,
369        is_error: Option<bool>,
370        call_id: Option<String>,
371    },
372    /// Google search call delta
373    GoogleSearchCall {
374        arguments: Option<GoogleSearchArguments>,
375        id: Option<String>,
376    },
377    /// Google search result delta
378    GoogleSearchResult {
379        signature: Option<String>,
380        result: Option<Vec<GoogleSearchResultData>>,
381        is_error: Option<bool>,
382        call_id: Option<String>,
383    },
384    /// File search call delta
385    FileSearchCall { id: Option<String> },
386    /// File search result delta
387    FileSearchResult {
388        result: Option<Vec<FileSearchResultData>>,
389    },
390    /// MCP server tool call delta
391    McpServerToolCall {
392        name: Option<String>,
393        server_name: Option<String>,
394        arguments: Option<Value>,
395        id: Option<String>,
396    },
397    /// MCP server tool result delta
398    McpServerToolResult {
399        name: Option<String>,
400        server_name: Option<String>,
401        result: Option<Value>,
402        call_id: Option<String>,
403    },
404}
405
406/// Error information in streaming events
407#[skip_serializing_none]
408#[derive(Debug, Clone, Deserialize, Serialize)]
409pub struct InteractionsError {
410    /// Error code
411    pub code: Option<String>,
412    /// Error message
413    pub message: Option<String>,
414}
415
416// ============================================================================
417// Query Parameters
418// ============================================================================
419
420/// Query parameters for GET /interactions/{id}
421#[skip_serializing_none]
422#[derive(Debug, Clone, Default, Deserialize, Serialize)]
423pub struct InteractionsGetParams {
424    /// Whether to stream the response
425    pub stream: Option<bool>,
426    /// Last event ID for resuming a stream
427    pub last_event_id: Option<String>,
428    /// API version
429    pub api_version: Option<String>,
430}
431
432/// Query parameters for DELETE /interactions/{id}
433#[skip_serializing_none]
434#[derive(Debug, Clone, Default, Deserialize, Serialize)]
435pub struct InteractionsDeleteParams {
436    /// API version
437    pub api_version: Option<String>,
438}
439
440/// Query parameters for POST /interactions/{id}/cancel
441#[skip_serializing_none]
442#[derive(Debug, Clone, Default, Deserialize, Serialize)]
443pub struct InteractionsCancelParams {
444    /// API version
445    pub api_version: Option<String>,
446}
447
448// ============================================================================
449// Interaction Tools
450// ============================================================================
451
452/// Interaction tool types
453/// See: https://ai.google.dev/api/interactions-api#Resource:Tool
454#[skip_serializing_none]
455#[derive(Debug, Clone, Deserialize, Serialize)]
456#[serde(tag = "type", rename_all = "snake_case")]
457pub enum InteractionsTool {
458    /// Function tool with function declaration
459    Function(Function),
460    /// Google Search built-in tool
461    GoogleSearch {},
462    /// Code Execution built-in tool
463    CodeExecution {},
464    /// URL Context built-in tool
465    UrlContext {},
466    /// MCP Server tool
467    McpServer {
468        name: Option<String>,
469        url: Option<String>,
470        headers: Option<HashMap<String, String>>,
471        allowed_tools: Option<AllowedTools>,
472    },
473    /// File Search built-in tool
474    FileSearch {
475        /// Names of file search stores to search
476        file_search_store_names: Option<Vec<String>>,
477        /// Maximum number of results to return
478        top_k: Option<u32>,
479        /// Metadata filter for search
480        metadata_filter: Option<String>,
481    },
482}
483
484/// Allowed tools configuration for MCP server
485#[skip_serializing_none]
486#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
487pub struct AllowedTools {
488    /// Tool choice mode: auto, any, none, or validated
489    pub mode: Option<ToolChoiceType>,
490    /// List of allowed tool names
491    pub tools: Option<Vec<String>>,
492}
493
494#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
495#[serde(rename_all = "snake_case")]
496pub enum ToolChoiceType {
497    Auto,
498    Any,
499    None,
500    Validated,
501}
502
503// ============================================================================
504// Generation Config (Gemini-specific)
505// ============================================================================
506
507#[skip_serializing_none]
508#[derive(Debug, Clone, Deserialize, Serialize, Validate)]
509pub struct GenerationConfig {
510    #[validate(range(min = 0.0, max = 2.0))]
511    pub temperature: Option<f32>,
512
513    #[validate(custom(function = "validate_top_p_value"))]
514    pub top_p: Option<f32>,
515
516    pub seed: Option<i64>,
517
518    #[validate(custom(function = "validate_stop_sequences"))]
519    pub stop_sequences: Option<Vec<String>>,
520
521    pub tool_choice: Option<ToolChoice>,
522
523    pub thinking_level: Option<ThinkingLevel>,
524
525    pub thinking_summaries: Option<ThinkingSummaries>,
526
527    #[validate(range(min = 1))]
528    pub max_output_tokens: Option<u32>,
529
530    pub speech_config: Option<Vec<SpeechConfig>>,
531
532    pub image_config: Option<ImageConfig>,
533}
534
535#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
536#[serde(rename_all = "snake_case")]
537pub enum ThinkingLevel {
538    Minimal,
539    Low,
540    Medium,
541    High,
542}
543
544#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
545#[serde(rename_all = "snake_case")]
546pub enum ThinkingSummaries {
547    Auto,
548    None,
549}
550
551/// Tool choice can be a simple mode or a detailed config
552#[derive(Debug, Clone, Deserialize, Serialize)]
553#[serde(untagged)]
554pub enum ToolChoice {
555    Type(ToolChoiceType),
556    Config(ToolChoiceConfig),
557}
558
559#[skip_serializing_none]
560#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
561pub struct ToolChoiceConfig {
562    pub allowed_tools: Option<AllowedTools>,
563}
564
565#[skip_serializing_none]
566#[derive(Debug, Clone, Deserialize, Serialize)]
567pub struct SpeechConfig {
568    pub voice: Option<String>,
569    pub language: Option<String>,
570    pub speaker: Option<String>,
571}
572
573#[skip_serializing_none]
574#[derive(Debug, Clone, Deserialize, Serialize)]
575pub struct ImageConfig {
576    pub aspect_ratio: Option<AspectRatio>,
577    pub image_size: Option<ImageSize>,
578}
579
580#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
581pub enum AspectRatio {
582    #[serde(rename = "1:1")]
583    Square,
584    #[serde(rename = "2:3")]
585    Portrait2x3,
586    #[serde(rename = "3:2")]
587    Landscape3x2,
588    #[serde(rename = "3:4")]
589    Portrait3x4,
590    #[serde(rename = "4:3")]
591    Landscape4x3,
592    #[serde(rename = "4:5")]
593    Portrait4x5,
594    #[serde(rename = "5:4")]
595    Landscape5x4,
596    #[serde(rename = "9:16")]
597    Portrait9x16,
598    #[serde(rename = "16:9")]
599    Landscape16x9,
600    #[serde(rename = "21:9")]
601    UltraWide,
602}
603
604#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
605pub enum ImageSize {
606    #[serde(rename = "1K")]
607    OneK,
608    #[serde(rename = "2K")]
609    TwoK,
610    #[serde(rename = "4K")]
611    FourK,
612}
613
614/// Agent configuration
615/// See: https://ai.google.dev/api/interactions-api#CreateInteraction-deep_research
616#[derive(Debug, Clone, Deserialize, Serialize)]
617#[serde(tag = "type", rename_all = "snake_case")]
618pub enum AgentConfig {
619    /// Dynamic agent configuration
620    Dynamic {},
621    /// Deep Research agent configuration
622    #[serde(rename = "deep-research")]
623    DeepResearch {
624        /// Whether to include thought summaries ("auto" or "none")
625        #[serde(skip_serializing_if = "Option::is_none")]
626        thinking_summaries: Option<ThinkingSummaries>,
627    },
628}
629
630// ============================================================================
631// Input/Output Types
632// ============================================================================
633
634/// Input can be Content, array of Content, array of Turn, or string
635/// See: https://ai.google.dev/api/interactions-api#request-body
636#[derive(Debug, Clone, Deserialize, Serialize)]
637#[serde(untagged)]
638pub enum InteractionsInput {
639    /// Simple text input
640    Text(String),
641    /// Single content block
642    Content(Content),
643    /// Array of content blocks
644    Contents(Vec<Content>),
645    /// Array of turns (conversation history)
646    Turns(Vec<Turn>),
647}
648
649/// A turn in a conversation with role and content
650/// See: https://ai.google.dev/api/interactions-api#Resource:Turn
651#[skip_serializing_none]
652#[derive(Debug, Clone, Deserialize, Serialize)]
653pub struct Turn {
654    /// Role: "user" or "model"
655    pub role: Option<String>,
656    /// Content can be array of Content or string
657    pub content: Option<TurnContent>,
658}
659
660/// Turn content can be array of Content or a simple string
661#[derive(Debug, Clone, Deserialize, Serialize)]
662#[serde(untagged)]
663pub enum TurnContent {
664    Contents(Vec<Content>),
665    Text(String),
666}
667
668/// Content is a polymorphic type representing different content types
669/// See: https://ai.google.dev/api/interactions-api#Resource:Content
670#[skip_serializing_none]
671#[derive(Debug, Clone, Deserialize, Serialize)]
672#[serde(tag = "type", rename_all = "snake_case")]
673pub enum Content {
674    /// Text content
675    Text {
676        text: Option<String>,
677        annotations: Option<Vec<Annotation>>,
678    },
679
680    /// Image content
681    Image {
682        data: Option<String>,
683        uri: Option<String>,
684        mime_type: Option<ImageMimeType>,
685        resolution: Option<MediaResolution>,
686    },
687
688    /// Audio content
689    Audio {
690        data: Option<String>,
691        uri: Option<String>,
692        mime_type: Option<AudioMimeType>,
693    },
694
695    /// Document content (PDF)
696    Document {
697        data: Option<String>,
698        uri: Option<String>,
699        mime_type: Option<DocumentMimeType>,
700    },
701
702    /// Video content
703    Video {
704        data: Option<String>,
705        uri: Option<String>,
706        mime_type: Option<VideoMimeType>,
707        resolution: Option<MediaResolution>,
708    },
709
710    /// Thought content
711    Thought {
712        signature: Option<String>,
713        summary: Option<Vec<ThoughtSummaryContent>>,
714    },
715
716    /// Function call content
717    FunctionCall {
718        name: String,
719        arguments: Value,
720        id: String,
721    },
722
723    /// Function result content
724    FunctionResult {
725        name: Option<String>,
726        is_error: Option<bool>,
727        result: Value,
728        call_id: String,
729    },
730
731    /// Code execution call content
732    CodeExecutionCall {
733        arguments: Option<CodeExecutionArguments>,
734        id: Option<String>,
735    },
736
737    /// Code execution result content
738    CodeExecutionResult {
739        result: Option<String>,
740        is_error: Option<bool>,
741        signature: Option<String>,
742        call_id: Option<String>,
743    },
744
745    /// URL context call content
746    UrlContextCall {
747        arguments: Option<UrlContextArguments>,
748        id: Option<String>,
749    },
750
751    /// URL context result content
752    UrlContextResult {
753        signature: Option<String>,
754        result: Option<Vec<UrlContextResultData>>,
755        is_error: Option<bool>,
756        call_id: Option<String>,
757    },
758
759    /// Google search call content
760    GoogleSearchCall {
761        arguments: Option<GoogleSearchArguments>,
762        id: Option<String>,
763    },
764
765    /// Google search result content
766    GoogleSearchResult {
767        signature: Option<String>,
768        result: Option<Vec<GoogleSearchResultData>>,
769        is_error: Option<bool>,
770        call_id: Option<String>,
771    },
772
773    /// File search call content
774    FileSearchCall { id: Option<String> },
775
776    /// File search result content
777    FileSearchResult {
778        result: Option<Vec<FileSearchResultData>>,
779    },
780
781    /// MCP server tool call content
782    McpServerToolCall {
783        name: String,
784        server_name: String,
785        arguments: Value,
786        id: String,
787    },
788
789    /// MCP server tool result content
790    McpServerToolResult {
791        name: Option<String>,
792        server_name: Option<String>,
793        result: Value,
794        call_id: String,
795    },
796}
797
798/// Content types allowed in thought summary (text or image only)
799#[skip_serializing_none]
800#[derive(Debug, Clone, Deserialize, Serialize)]
801#[serde(tag = "type", rename_all = "snake_case")]
802pub enum ThoughtSummaryContent {
803    /// Text content in thought summary
804    Text {
805        text: Option<String>,
806        annotations: Option<Vec<Annotation>>,
807    },
808    /// Image content in thought summary
809    Image {
810        data: Option<String>,
811        uri: Option<String>,
812        mime_type: Option<ImageMimeType>,
813        resolution: Option<MediaResolution>,
814    },
815}
816
817/// Annotation for text content (citations)
818#[skip_serializing_none]
819#[derive(Debug, Clone, Deserialize, Serialize)]
820pub struct Annotation {
821    /// Start of the attributed segment, measured in bytes
822    pub start_index: Option<u32>,
823    /// End of the attributed segment, exclusive
824    pub end_index: Option<u32>,
825    /// Source attributed for a portion of the text (URL, title, or other identifier)
826    pub source: Option<String>,
827}
828
829/// Arguments for URL context call
830#[skip_serializing_none]
831#[derive(Debug, Clone, Deserialize, Serialize)]
832pub struct UrlContextArguments {
833    /// The URLs to fetch
834    pub urls: Option<Vec<String>>,
835}
836
837/// Result data for URL context result
838#[skip_serializing_none]
839#[derive(Debug, Clone, Deserialize, Serialize)]
840pub struct UrlContextResultData {
841    /// The URL that was fetched
842    pub url: Option<String>,
843    /// The status of the URL retrieval
844    pub status: Option<UrlContextStatus>,
845}
846
847/// Status of URL context retrieval
848#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
849#[serde(rename_all = "snake_case")]
850pub enum UrlContextStatus {
851    Success,
852    Error,
853    Paywall,
854    Unsafe,
855}
856
857/// Arguments for Google search call
858#[skip_serializing_none]
859#[derive(Debug, Clone, Deserialize, Serialize)]
860pub struct GoogleSearchArguments {
861    /// Web search queries
862    pub queries: Option<Vec<String>>,
863}
864
865/// Result data for Google search result
866#[skip_serializing_none]
867#[derive(Debug, Clone, Deserialize, Serialize)]
868pub struct GoogleSearchResultData {
869    /// URI reference of the search result
870    pub url: Option<String>,
871    /// Title of the search result
872    pub title: Option<String>,
873    /// Web content snippet
874    pub rendered_content: Option<String>,
875}
876
877/// Result data for file search result
878#[skip_serializing_none]
879#[derive(Debug, Clone, Deserialize, Serialize)]
880pub struct FileSearchResultData {
881    /// Search result title
882    pub title: Option<String>,
883    /// Search result text
884    pub text: Option<String>,
885    /// Name of the file search store
886    pub file_search_store: Option<String>,
887}
888
889/// Arguments for code execution call
890#[skip_serializing_none]
891#[derive(Debug, Clone, Deserialize, Serialize)]
892pub struct CodeExecutionArguments {
893    /// Programming language (currently only Python is supported)
894    pub language: Option<CodeExecutionLanguage>,
895    /// The code to be executed
896    pub code: Option<String>,
897}
898
899/// Supported languages for code execution
900#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
901#[serde(rename_all = "snake_case")]
902pub enum CodeExecutionLanguage {
903    Python,
904}
905
906/// Image/video resolution options
907#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
908#[serde(rename_all = "snake_case")]
909pub enum MediaResolution {
910    Low,
911    Medium,
912    High,
913    UltraHigh,
914}
915
916/// Supported image MIME types
917#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
918pub enum ImageMimeType {
919    #[serde(rename = "image/png")]
920    Png,
921    #[serde(rename = "image/jpeg")]
922    Jpeg,
923    #[serde(rename = "image/webp")]
924    Webp,
925    #[serde(rename = "image/heic")]
926    Heic,
927    #[serde(rename = "image/heif")]
928    Heif,
929}
930
931impl ImageMimeType {
932    pub fn as_str(&self) -> &'static str {
933        match self {
934            ImageMimeType::Png => "image/png",
935            ImageMimeType::Jpeg => "image/jpeg",
936            ImageMimeType::Webp => "image/webp",
937            ImageMimeType::Heic => "image/heic",
938            ImageMimeType::Heif => "image/heif",
939        }
940    }
941}
942
943/// Supported audio MIME types
944#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
945pub enum AudioMimeType {
946    #[serde(rename = "audio/wav")]
947    Wav,
948    #[serde(rename = "audio/mp3")]
949    Mp3,
950    #[serde(rename = "audio/aiff")]
951    Aiff,
952    #[serde(rename = "audio/aac")]
953    Aac,
954    #[serde(rename = "audio/ogg")]
955    Ogg,
956    #[serde(rename = "audio/flac")]
957    Flac,
958}
959
960impl AudioMimeType {
961    pub fn as_str(&self) -> &'static str {
962        match self {
963            AudioMimeType::Wav => "audio/wav",
964            AudioMimeType::Mp3 => "audio/mp3",
965            AudioMimeType::Aiff => "audio/aiff",
966            AudioMimeType::Aac => "audio/aac",
967            AudioMimeType::Ogg => "audio/ogg",
968            AudioMimeType::Flac => "audio/flac",
969        }
970    }
971}
972
973/// Supported document MIME types
974#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
975pub enum DocumentMimeType {
976    #[serde(rename = "application/pdf")]
977    Pdf,
978}
979
980impl DocumentMimeType {
981    pub fn as_str(&self) -> &'static str {
982        match self {
983            DocumentMimeType::Pdf => "application/pdf",
984        }
985    }
986}
987
988/// Supported video MIME types
989#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
990pub enum VideoMimeType {
991    #[serde(rename = "video/mp4")]
992    Mp4,
993    #[serde(rename = "video/mpeg")]
994    Mpeg,
995    #[serde(rename = "video/mov")]
996    Mov,
997    #[serde(rename = "video/avi")]
998    Avi,
999    #[serde(rename = "video/x-flv")]
1000    Flv,
1001    #[serde(rename = "video/mpg")]
1002    Mpg,
1003    #[serde(rename = "video/webm")]
1004    Webm,
1005    #[serde(rename = "video/wmv")]
1006    Wmv,
1007    #[serde(rename = "video/3gpp")]
1008    ThreeGpp,
1009}
1010
1011impl VideoMimeType {
1012    pub fn as_str(&self) -> &'static str {
1013        match self {
1014            VideoMimeType::Mp4 => "video/mp4",
1015            VideoMimeType::Mpeg => "video/mpeg",
1016            VideoMimeType::Mov => "video/mov",
1017            VideoMimeType::Avi => "video/avi",
1018            VideoMimeType::Flv => "video/x-flv",
1019            VideoMimeType::Mpg => "video/mpg",
1020            VideoMimeType::Webm => "video/webm",
1021            VideoMimeType::Wmv => "video/wmv",
1022            VideoMimeType::ThreeGpp => "video/3gpp",
1023        }
1024    }
1025}
1026
1027// ============================================================================
1028// Status Types
1029// ============================================================================
1030
1031#[derive(Debug, Clone, Default, PartialEq, Deserialize, Serialize)]
1032#[serde(rename_all = "snake_case")]
1033pub enum InteractionsStatus {
1034    #[default]
1035    InProgress,
1036    RequiresAction,
1037    Completed,
1038    Failed,
1039    Cancelled,
1040}
1041
1042// ============================================================================
1043// Usage Types
1044// ============================================================================
1045
1046/// Token count by modality
1047#[skip_serializing_none]
1048#[derive(Debug, Clone, Deserialize, Serialize)]
1049pub struct ModalityTokens {
1050    pub modality: Option<ResponseModality>,
1051    pub tokens: Option<u32>,
1052}
1053
1054#[skip_serializing_none]
1055#[derive(Debug, Clone, Deserialize, Serialize)]
1056pub struct InteractionsUsage {
1057    pub total_input_tokens: Option<u32>,
1058    pub input_tokens_by_modality: Option<Vec<ModalityTokens>>,
1059    pub total_cached_tokens: Option<u32>,
1060    pub cached_tokens_by_modality: Option<Vec<ModalityTokens>>,
1061    pub total_output_tokens: Option<u32>,
1062    pub output_tokens_by_modality: Option<Vec<ModalityTokens>>,
1063    pub total_tool_use_tokens: Option<u32>,
1064    pub tool_use_tokens_by_modality: Option<Vec<ModalityTokens>>,
1065    pub total_thought_tokens: Option<u32>,
1066    pub total_tokens: Option<u32>,
1067}
1068
1069#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
1070#[serde(rename_all = "snake_case")]
1071pub enum ResponseModality {
1072    Text,
1073    Image,
1074    Audio,
1075}
1076
1077fn is_option_blank(v: Option<&String>) -> bool {
1078    v.map(|s| s.trim().is_empty()).unwrap_or(true)
1079}
1080
1081fn validate_interactions_request(req: &InteractionsRequest) -> Result<(), ValidationError> {
1082    // Exactly one of model or agent must be provided
1083    if is_option_blank(req.model.as_ref()) && is_option_blank(req.agent.as_ref()) {
1084        return Err(ValidationError::new("model_or_agent_required"));
1085    }
1086    if !is_option_blank(req.model.as_ref()) && !is_option_blank(req.agent.as_ref()) {
1087        let mut e = ValidationError::new("model_and_agent_mutually_exclusive");
1088        e.message = Some("Cannot set both model and agent. Provide exactly one.".into());
1089        return Err(e);
1090    }
1091
1092    // response_mime_type is required when response_format is set
1093    if req.response_format.is_some() && is_option_blank(req.response_mime_type.as_ref()) {
1094        return Err(ValidationError::new("response_mime_type_required"));
1095    }
1096
1097    // background mode is required for agent interactions, and only for agents
1098    if !is_option_blank(req.agent.as_ref()) && !req.background {
1099        let mut e = ValidationError::new("agent_requires_background");
1100        e.message = Some("Agent interactions require background mode to be enabled.".into());
1101        return Err(e);
1102    }
1103    if !is_option_blank(req.model.as_ref()) && req.background {
1104        let mut e = ValidationError::new("background_requires_agent");
1105        e.message = Some("Background mode is only supported for agent interactions.".into());
1106        return Err(e);
1107    }
1108
1109    // background and stream are mutually exclusive
1110    if req.background && req.stream {
1111        let mut e = ValidationError::new("background_conflicts_with_stream");
1112        e.message = Some("Cannot set both background and stream to true.".into());
1113        return Err(e);
1114    }
1115    Ok(())
1116}
1117
1118fn validate_tools(tools: &[InteractionsTool]) -> Result<(), ValidationError> {
1119    // FileSearch tool is not supported yet
1120    if tools
1121        .iter()
1122        .any(|t| matches!(t, InteractionsTool::FileSearch { .. }))
1123    {
1124        return Err(ValidationError::new("file_search_tool_not_supported"));
1125    }
1126    Ok(())
1127}
1128
1129fn validate_input(input: &InteractionsInput) -> Result<(), ValidationError> {
1130    // Reject empty input
1131    let empty_msg = match input {
1132        InteractionsInput::Text(s) if s.trim().is_empty() => Some("Input text cannot be empty"),
1133        InteractionsInput::Content(content) if is_content_empty(content) => {
1134            Some("Input content cannot be empty")
1135        }
1136        InteractionsInput::Contents(contents) if contents.is_empty() => {
1137            Some("Input content array cannot be empty")
1138        }
1139        InteractionsInput::Contents(contents) if contents.iter().any(is_content_empty) => {
1140            Some("Input content array contains empty content items")
1141        }
1142        InteractionsInput::Turns(turns) if turns.is_empty() => {
1143            Some("Input turns array cannot be empty")
1144        }
1145        InteractionsInput::Turns(turns) if turns.iter().any(is_turn_empty) => {
1146            Some("Input turns array contains empty turn items")
1147        }
1148        _ => None,
1149    };
1150    if let Some(msg) = empty_msg {
1151        let mut e = ValidationError::new("input_cannot_be_empty");
1152        e.message = Some(msg.into());
1153        return Err(e);
1154    }
1155
1156    // Reject unsupported file search content
1157    fn has_file_search_content(content: &Content) -> bool {
1158        matches!(
1159            content,
1160            Content::FileSearchCall { .. } | Content::FileSearchResult { .. }
1161        )
1162    }
1163
1164    fn check_turn(turn: &Turn) -> bool {
1165        if let Some(content) = &turn.content {
1166            match content {
1167                TurnContent::Contents(contents) => contents.iter().any(has_file_search_content),
1168                TurnContent::Text(_) => false,
1169            }
1170        } else {
1171            false
1172        }
1173    }
1174
1175    let has_file_search = match input {
1176        InteractionsInput::Text(_) => false,
1177        InteractionsInput::Content(content) => has_file_search_content(content),
1178        InteractionsInput::Contents(contents) => contents.iter().any(has_file_search_content),
1179        InteractionsInput::Turns(turns) => turns.iter().any(check_turn),
1180    };
1181
1182    if has_file_search {
1183        return Err(ValidationError::new("file_search_content_not_supported"));
1184    }
1185    Ok(())
1186}
1187
1188fn is_content_empty(content: &Content) -> bool {
1189    match content {
1190        Content::Text { text, .. } => is_option_blank(text.as_ref()),
1191        Content::Image { data, uri, .. }
1192        | Content::Audio { data, uri, .. }
1193        | Content::Document { data, uri, .. }
1194        | Content::Video { data, uri, .. } => {
1195            is_option_blank(data.as_ref()) && is_option_blank(uri.as_ref())
1196        }
1197        Content::CodeExecutionCall { id, .. }
1198        | Content::UrlContextCall { id, .. }
1199        | Content::GoogleSearchCall { id, .. } => is_option_blank(id.as_ref()),
1200        Content::CodeExecutionResult { call_id, .. }
1201        | Content::UrlContextResult { call_id, .. }
1202        | Content::GoogleSearchResult { call_id, .. } => is_option_blank(call_id.as_ref()),
1203        _ => false,
1204    }
1205}
1206
1207fn is_turn_empty(turn: &Turn) -> bool {
1208    match &turn.content {
1209        None => true,
1210        Some(TurnContent::Text(s)) => s.trim().is_empty(),
1211        Some(TurnContent::Contents(contents)) => {
1212            contents.is_empty() || contents.iter().any(is_content_empty)
1213        }
1214    }
1215}
1216
1217fn validate_stop_sequences(seqs: &[String]) -> Result<(), ValidationError> {
1218    if seqs.len() > 5 {
1219        let mut e = ValidationError::new("too_many_stop_sequences");
1220        e.message = Some("Maximum 5 stop sequences allowed".into());
1221        return Err(e);
1222    }
1223    if seqs.iter().any(|s| s.trim().is_empty()) {
1224        let mut e = ValidationError::new("stop_sequences_cannot_be_empty");
1225        e.message = Some("Stop sequences cannot contain empty strings".into());
1226        return Err(e);
1227    }
1228    Ok(())
1229}
openai_protocol/interactions.rs

openai_protocol/
interactions.rs