1use std::collections::HashMap;
10
11use serde::{Deserialize, Serialize};
12use validator::{Validate, ValidationError};
13
14use crate::{
15 builders::RealtimeResponseBuilder,
16 common::ResponsePrompt,
17 realtime_conversation::{ConversationItemRole, RealtimeContentPart, RealtimeConversationItem},
18 realtime_session::{
19 MaxOutputTokens, OutputModality, RealtimeAudioFormats, RealtimeToolChoiceConfig,
20 RealtimeToolsConfig, Voice,
21 },
22};
23
24#[serde_with::skip_serializing_none]
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct RealtimeResponse {
32 pub id: Option<String>,
33 pub audio: Option<RealtimeResponseCreateAudioOutput>,
34 pub conversation_id: Option<String>,
35 pub max_output_tokens: Option<MaxOutputTokens>,
36 pub metadata: Option<HashMap<String, String>>,
37 pub object: Option<RealtimeResponseObject>,
38 pub output: Option<Vec<RealtimeConversationItem>>,
39 pub output_modalities: Option<Vec<OutputModality>>,
40 pub status: Option<ResponseStatus>,
41 pub status_details: Option<RealtimeResponseStatus>,
42 pub usage: Option<RealtimeResponseUsage>,
43}
44
45#[serde_with::skip_serializing_none]
51#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
52#[validate(schema(function = "validate_response_create_params"))]
53pub struct RealtimeResponseCreateParams {
54 pub audio: Option<RealtimeResponseCreateAudioOutput>,
55 pub conversation: Option<ResponseConversation>,
56 pub input: Option<Vec<RealtimeConversationItem>>,
57 pub instructions: Option<String>,
58 pub max_output_tokens: Option<MaxOutputTokens>,
59 pub metadata: Option<HashMap<String, String>>,
60 pub output_modalities: Option<Vec<OutputModality>>,
61 pub prompt: Option<ResponsePrompt>,
62 pub tool_choice: Option<RealtimeToolChoiceConfig>,
63 pub tools: Option<Vec<RealtimeToolsConfig>>,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
72pub enum RealtimeResponseObject {
73 #[serde(rename = "realtime.response")]
74 RealtimeResponse,
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
79#[serde(rename_all = "snake_case")]
80pub enum ResponseStatus {
81 Completed,
82 Cancelled,
83 Failed,
84 Incomplete,
85 InProgress,
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
90#[serde(rename_all = "snake_case")]
91pub enum StatusDetailsType {
92 Completed,
93 Cancelled,
94 Failed,
95 Incomplete,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
100#[serde(rename_all = "snake_case")]
101pub enum StatusDetailsReason {
102 TurnDetected,
103 ClientCancelled,
104 MaxOutputTokens,
105 ContentFilter,
106}
107
108#[serde_with::skip_serializing_none]
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct ResponseStatusError {
112 pub code: Option<String>,
113 #[serde(rename = "type")]
114 pub r#type: Option<String>,
115}
116
117#[serde_with::skip_serializing_none]
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct RealtimeResponseStatus {
121 pub error: Option<ResponseStatusError>,
122 pub reason: Option<StatusDetailsReason>,
123 #[serde(rename = "type")]
124 pub r#type: Option<StatusDetailsType>,
125}
126
127#[serde_with::skip_serializing_none]
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct ResponseAudioOutputConfig {
135 pub format: Option<RealtimeAudioFormats>,
136 pub voice: Option<Voice>,
137}
138
139#[serde_with::skip_serializing_none]
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct RealtimeResponseCreateAudioOutput {
143 pub output: Option<ResponseAudioOutputConfig>,
144}
145
146#[serde_with::skip_serializing_none]
152#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct CachedTokensDetails {
154 pub audio_tokens: Option<u64>,
155 pub image_tokens: Option<u64>,
156 pub text_tokens: Option<u64>,
157}
158
159#[serde_with::skip_serializing_none]
161#[derive(Debug, Clone, Serialize, Deserialize)]
162pub struct RealtimeResponseUsageInputTokenDetails {
163 pub audio_tokens: Option<u64>,
164 pub cached_tokens: Option<u64>,
165 pub cached_tokens_details: Option<CachedTokensDetails>,
166 pub image_tokens: Option<u64>,
167 pub text_tokens: Option<u64>,
168}
169
170#[serde_with::skip_serializing_none]
172#[derive(Debug, Clone, Serialize, Deserialize)]
173pub struct RealtimeResponseUsageOutputTokenDetails {
174 pub audio_tokens: Option<u64>,
175 pub text_tokens: Option<u64>,
176}
177
178#[serde_with::skip_serializing_none]
180#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct RealtimeResponseUsage {
182 pub input_token_details: Option<RealtimeResponseUsageInputTokenDetails>,
183 pub input_tokens: Option<u64>,
184 pub output_token_details: Option<RealtimeResponseUsageOutputTokenDetails>,
185 pub output_tokens: Option<u64>,
186 pub total_tokens: Option<u64>,
187}
188
189#[derive(Debug, Clone, Serialize, Deserialize)]
198#[serde(untagged)]
199pub enum ResponseConversation {
200 Mode(ResponseConversationMode),
201 Id(String),
202}
203
204#[derive(Debug, Clone, Serialize, Deserialize, Default)]
207#[serde(rename_all = "snake_case")]
208pub enum ResponseConversationMode {
209 #[default]
210 Auto,
211 None,
212}
213
214fn validate_response_create_params(
216 request: &RealtimeResponseCreateParams,
217) -> Result<(), ValidationError> {
218 if let Some(items) = &request.input {
220 for item in items {
221 validate_conversation_item(item)?;
222 }
223 }
224 Ok(())
225}
226
227fn validate_conversation_item(item: &RealtimeConversationItem) -> Result<(), ValidationError> {
235 let (role, content) = match item {
236 RealtimeConversationItem::Message { role, content, .. } => (role, content),
237 _ => return Ok(()),
238 };
239
240 for (i, part) in content.iter().enumerate() {
241 let allowed = match role {
242 ConversationItemRole::System => {
243 matches!(part, RealtimeContentPart::InputText { .. })
244 }
245 ConversationItemRole::User => matches!(
246 part,
247 RealtimeContentPart::InputText { .. }
248 | RealtimeContentPart::InputAudio { .. }
249 | RealtimeContentPart::InputImage { .. }
250 ),
251 ConversationItemRole::Assistant => matches!(
252 part,
253 RealtimeContentPart::OutputText { .. } | RealtimeContentPart::OutputAudio { .. }
254 ),
255 };
256
257 if !allowed {
258 let mut err = ValidationError::new("invalid_content_part");
259 err.message = Some(
260 format!(
261 "content[{}]: {:?} role does not allow \"{}\" content parts",
262 i,
263 role,
264 part.type_name()
265 )
266 .into(),
267 );
268 return Err(err);
269 }
270 }
271
272 Ok(())
273}
274
275impl RealtimeResponse {
276 pub fn builder(id: impl Into<String>) -> RealtimeResponseBuilder {
278 RealtimeResponseBuilder::new(id)
279 }
280}