Skip to main content

openai_protocol/
realtime_response.rs

1// OpenAI Realtime Conversation API types
2// https://platform.openai.com/docs/api-reference/realtime
3//
4// Session configuration and audio types live in `realtime_session`.
5// Event type constants live in `event_types`.
6// This module covers the realtime response
7// object, usage, errors, rate limits.
8
9use std::collections::HashMap;
10
11use serde::{Deserialize, Serialize};
12use validator::{Validate, ValidationError};
13
14use crate::{
15    builders::RealtimeResponseBuilder,
16    common::ResponsePrompt,
17    realtime_conversation::{ConversationItemRole, RealtimeContentPart, RealtimeConversationItem},
18    realtime_session::{
19        MaxOutputTokens, OutputModality, RealtimeAudioFormats, RealtimeToolChoiceConfig,
20        RealtimeToolsConfig, Voice,
21    },
22};
23
24// ============================================================================
25// Realtime Response
26// ============================================================================
27
28/// A response object in the Realtime API.
29#[serde_with::skip_serializing_none]
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct RealtimeResponse {
32    pub id: Option<String>,
33    pub audio: Option<RealtimeResponseCreateAudioOutput>,
34    pub conversation_id: Option<String>,
35    pub max_output_tokens: Option<MaxOutputTokens>,
36    pub metadata: Option<HashMap<String, String>>,
37    pub object: Option<RealtimeResponseObject>,
38    pub output: Option<Vec<RealtimeConversationItem>>,
39    pub output_modalities: Option<Vec<OutputModality>>,
40    pub status: Option<ResponseStatus>,
41    pub status_details: Option<RealtimeResponseStatus>,
42    pub usage: Option<RealtimeResponseUsage>,
43}
44
45// ============================================================================
46// Realtime Response Create Params
47// ============================================================================
48
49/// Parameters for creating a realtime response.
50#[serde_with::skip_serializing_none]
51#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
52#[validate(schema(function = "validate_response_create_params"))]
53pub struct RealtimeResponseCreateParams {
54    pub audio: Option<RealtimeResponseCreateAudioOutput>,
55    pub conversation: Option<ResponseConversation>,
56    pub input: Option<Vec<RealtimeConversationItem>>,
57    pub instructions: Option<String>,
58    pub max_output_tokens: Option<MaxOutputTokens>,
59    pub metadata: Option<HashMap<String, String>>,
60    pub output_modalities: Option<Vec<OutputModality>>,
61    pub prompt: Option<ResponsePrompt>,
62    pub tool_choice: Option<RealtimeToolChoiceConfig>,
63    pub tools: Option<Vec<RealtimeToolsConfig>>,
64}
65
66// ============================================================================
67// Response Status
68// ============================================================================
69
70/// Object type for realtime responses. Always `"realtime.response"`.
71#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
72pub enum RealtimeResponseObject {
73    #[serde(rename = "realtime.response")]
74    RealtimeResponse,
75}
76
77/// Status of a realtime response.
78#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
79#[serde(rename_all = "snake_case")]
80pub enum ResponseStatus {
81    Completed,
82    Cancelled,
83    Failed,
84    Incomplete,
85    InProgress,
86}
87
88/// The type within status details.
89#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
90#[serde(rename_all = "snake_case")]
91pub enum StatusDetailsType {
92    Completed,
93    Cancelled,
94    Failed,
95    Incomplete,
96}
97
98/// Reason the response did not complete.
99#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
100#[serde(rename_all = "snake_case")]
101pub enum StatusDetailsReason {
102    TurnDetected,
103    ClientCancelled,
104    MaxOutputTokens,
105    ContentFilter,
106}
107
108/// Error that caused the response to fail.
109#[serde_with::skip_serializing_none]
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct ResponseStatusError {
112    pub code: Option<String>,
113    #[serde(rename = "type")]
114    pub r#type: Option<String>,
115}
116
117/// Additional details about the response status.
118#[serde_with::skip_serializing_none]
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct RealtimeResponseStatus {
121    pub error: Option<ResponseStatusError>,
122    pub reason: Option<StatusDetailsReason>,
123    #[serde(rename = "type")]
124    pub r#type: Option<StatusDetailsType>,
125}
126
127// ============================================================================
128// Response Audio Configuration
129// ============================================================================
130
131/// Audio output configuration for a response.
132#[serde_with::skip_serializing_none]
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct ResponseAudioOutputConfig {
135    pub format: Option<RealtimeAudioFormats>,
136    pub voice: Option<Voice>,
137}
138
139/// Audio configuration for a response (output only).
140#[serde_with::skip_serializing_none]
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct RealtimeResponseCreateAudioOutput {
143    pub output: Option<ResponseAudioOutputConfig>,
144}
145
146// ============================================================================
147// Usage
148// ============================================================================
149
150/// Breakdown of cached token usage by modality.
151#[serde_with::skip_serializing_none]
152#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct CachedTokensDetails {
154    pub audio_tokens: Option<u64>,
155    pub image_tokens: Option<u64>,
156    pub text_tokens: Option<u64>,
157}
158
159/// Input token usage details.
160#[serde_with::skip_serializing_none]
161#[derive(Debug, Clone, Serialize, Deserialize)]
162pub struct RealtimeResponseUsageInputTokenDetails {
163    pub audio_tokens: Option<u64>,
164    pub cached_tokens: Option<u64>,
165    pub cached_tokens_details: Option<CachedTokensDetails>,
166    pub image_tokens: Option<u64>,
167    pub text_tokens: Option<u64>,
168}
169
170/// Output token usage details.
171#[serde_with::skip_serializing_none]
172#[derive(Debug, Clone, Serialize, Deserialize)]
173pub struct RealtimeResponseUsageOutputTokenDetails {
174    pub audio_tokens: Option<u64>,
175    pub text_tokens: Option<u64>,
176}
177
178/// Token usage for a realtime response.
179#[serde_with::skip_serializing_none]
180#[derive(Debug, Clone, Serialize, Deserialize)]
181pub struct RealtimeResponseUsage {
182    pub input_token_details: Option<RealtimeResponseUsageInputTokenDetails>,
183    pub input_tokens: Option<u64>,
184    pub output_token_details: Option<RealtimeResponseUsageOutputTokenDetails>,
185    pub output_tokens: Option<u64>,
186    pub total_tokens: Option<u64>,
187}
188
189// ============================================================================
190// Response Conversation
191// ============================================================================
192
193/// `"auto"`, `"none"`, or a conversation ID string.
194///
195/// Variant order matters for `#[serde(untagged)]`: serde tries `Mode` first.
196/// `"auto"` and `"none"` match `Mode`; any other string falls through to `Id`.
197#[derive(Debug, Clone, Serialize, Deserialize)]
198#[serde(untagged)]
199pub enum ResponseConversation {
200    Mode(ResponseConversationMode),
201    Id(String),
202}
203
204/// Controls which conversation the response is added to.
205/// `auto` is the default.
206#[derive(Debug, Clone, Serialize, Deserialize, Default)]
207#[serde(rename_all = "snake_case")]
208pub enum ResponseConversationMode {
209    #[default]
210    Auto,
211    None,
212}
213
214/// Schema-level validation
215fn validate_response_create_params(
216    request: &RealtimeResponseCreateParams,
217) -> Result<(), ValidationError> {
218    // validate role→content-part constraints per the OpenAI spec
219    if let Some(items) = &request.input {
220        for item in items {
221            validate_conversation_item(item)?;
222        }
223    }
224    Ok(())
225}
226
227/// Validates role→content-part constraints per the OpenAI spec.
228///
229/// - `system`    → `input_text` only
230/// - `user`      → `input_text`, `input_audio`, `input_image`
231/// - `assistant` → `output_text`, `output_audio`
232///
233/// Non-`Message` variants are always valid.
234fn validate_conversation_item(item: &RealtimeConversationItem) -> Result<(), ValidationError> {
235    let (role, content) = match item {
236        RealtimeConversationItem::Message { role, content, .. } => (role, content),
237        _ => return Ok(()),
238    };
239
240    for (i, part) in content.iter().enumerate() {
241        let allowed = match role {
242            ConversationItemRole::System => {
243                matches!(part, RealtimeContentPart::InputText { .. })
244            }
245            ConversationItemRole::User => matches!(
246                part,
247                RealtimeContentPart::InputText { .. }
248                    | RealtimeContentPart::InputAudio { .. }
249                    | RealtimeContentPart::InputImage { .. }
250            ),
251            ConversationItemRole::Assistant => matches!(
252                part,
253                RealtimeContentPart::OutputText { .. } | RealtimeContentPart::OutputAudio { .. }
254            ),
255        };
256
257        if !allowed {
258            let mut err = ValidationError::new("invalid_content_part");
259            err.message = Some(
260                format!(
261                    "content[{}]: {:?} role does not allow \"{}\" content parts",
262                    i,
263                    role,
264                    part.type_name()
265                )
266                .into(),
267            );
268            return Err(err);
269        }
270    }
271
272    Ok(())
273}
274
275impl RealtimeResponse {
276    /// Create a builder for constructing a `RealtimeResponse`.
277    pub fn builder(id: impl Into<String>) -> RealtimeResponseBuilder {
278        RealtimeResponseBuilder::new(id)
279    }
280}