Skip to main content

openai_protocol/
realtime_events.rs

1// OpenAI Realtime API wire-format event types
2// https://platform.openai.com/docs/api-reference/realtime
3//
4// This module defines the serializable/deserializable event structures
5// for both client-to-server and server-to-client messages sent over
6// WebSocket, WebRTC, or SIP connections.
7//
8// Session configuration types live in `realtime_session`.
9// Conversation item types live in `realtime_conversation`.
10// Response and usage types live in `realtime_response`.
11// Event type string constants live in `event_types`.
12
13use serde::{Deserialize, Serialize};
14
15use crate::{
16    event_types::{RealtimeClientEvent, RealtimeServerEvent},
17    realtime_conversation::RealtimeConversationItem,
18    realtime_response::{RealtimeResponse, RealtimeResponseCreateParams},
19    realtime_session::{RealtimeSessionCreateRequest, RealtimeTranscriptionSessionCreateRequest},
20};
21
22// ============================================================================
23// Client Events
24// ============================================================================
25
26/// A client-to-server event in the OpenAI Realtime API.
27///
28/// Sent by the client over WebSocket, WebRTC, or SIP connections.
29/// Discriminated by the `type` field in the JSON wire format.
30///
31/// Large payloads (`SessionConfig` 624 B, `RealtimeResponseCreateParams` 384 B) are
32/// `Box`-ed so the enum stays ≈224 bytes instead of ≈648.
33#[serde_with::skip_serializing_none]
34#[derive(Debug, Clone, Serialize, Deserialize)]
35#[serde(tag = "type")]
36pub enum ClientEvent {
37    // ---- Session ----
38    /// Update the session configuration.
39    #[serde(rename = "session.update")]
40    SessionUpdate {
41        session: Box<SessionConfig>,
42        event_id: Option<String>,
43    },
44
45    // ---- Conversation items ----
46    /// Add a new item to the conversation.
47    #[serde(rename = "conversation.item.create")]
48    ConversationItemCreate {
49        item: RealtimeConversationItem,
50        event_id: Option<String>,
51        previous_item_id: Option<String>,
52    },
53
54    /// Remove an item from the conversation history.
55    #[serde(rename = "conversation.item.delete")]
56    ConversationItemDelete {
57        item_id: String,
58        event_id: Option<String>,
59    },
60
61    /// Retrieve the server's representation of a conversation item.
62    #[serde(rename = "conversation.item.retrieve")]
63    ConversationItemRetrieve {
64        item_id: String,
65        event_id: Option<String>,
66    },
67
68    /// Truncate a previous assistant message's audio.
69    #[serde(rename = "conversation.item.truncate")]
70    ConversationItemTruncate {
71        audio_end_ms: u32,
72        content_index: u32,
73        item_id: String,
74        event_id: Option<String>,
75    },
76
77    // ---- Input audio buffer ----
78    /// Append audio bytes to the input audio buffer.
79    ///
80    /// WARNING: `audio` contains a base64 audio blob that can be very large.
81    /// Avoid logging this variant with `Debug` in production; prefer
82    /// `event_type()` for structured logging.
83    #[serde(rename = "input_audio_buffer.append")]
84    InputAudioBufferAppend {
85        audio: String,
86        event_id: Option<String>,
87    },
88
89    /// Clear the input audio buffer.
90    #[serde(rename = "input_audio_buffer.clear")]
91    InputAudioBufferClear { event_id: Option<String> },
92
93    /// Commit the input audio buffer as a user message.
94    #[serde(rename = "input_audio_buffer.commit")]
95    InputAudioBufferCommit { event_id: Option<String> },
96
97    // ---- Output audio buffer (WebRTC/SIP only) ----
98    /// Cut off the current audio response.
99    #[serde(rename = "output_audio_buffer.clear")]
100    OutputAudioBufferClear { event_id: Option<String> },
101
102    // ---- Response ----
103    /// Cancel an in-progress response.
104    #[serde(rename = "response.cancel")]
105    ResponseCancel {
106        event_id: Option<String>,
107        response_id: Option<String>,
108    },
109
110    /// Trigger model inference to create a response.
111    #[serde(rename = "response.create")]
112    ResponseCreate {
113        event_id: Option<String>,
114        response: Option<Box<RealtimeResponseCreateParams>>,
115    },
116
117    // ---- Unknown ----
118    /// Unrecognized event type. Serde automatically deserializes any
119    /// unrecognized `type` value into this variant (no data preserved).
120    /// For proxy use, forward the raw frame instead of re-serializing.
121    #[serde(other)]
122    Unknown,
123}
124
125impl ClientEvent {
126    /// Returns the event type string (e.g. `"session.update"`).
127    ///
128    /// For unknown events, returns `"unknown"`.
129    pub fn event_type(&self) -> &str {
130        self.to_event_type()
131            .map(|e| e.as_str())
132            .unwrap_or("unknown")
133    }
134
135    /// Maps this event to its corresponding `RealtimeClientEvent` constant.
136    ///
137    /// Returns `None` for `Unknown` events.
138    pub fn to_event_type(&self) -> Option<RealtimeClientEvent> {
139        match self {
140            ClientEvent::SessionUpdate { .. } => Some(RealtimeClientEvent::SessionUpdate),
141            ClientEvent::ConversationItemCreate { .. } => {
142                Some(RealtimeClientEvent::ConversationItemCreate)
143            }
144            ClientEvent::ConversationItemDelete { .. } => {
145                Some(RealtimeClientEvent::ConversationItemDelete)
146            }
147            ClientEvent::ConversationItemRetrieve { .. } => {
148                Some(RealtimeClientEvent::ConversationItemRetrieve)
149            }
150            ClientEvent::ConversationItemTruncate { .. } => {
151                Some(RealtimeClientEvent::ConversationItemTruncate)
152            }
153            ClientEvent::InputAudioBufferAppend { .. } => {
154                Some(RealtimeClientEvent::InputAudioBufferAppend)
155            }
156            ClientEvent::InputAudioBufferClear { .. } => {
157                Some(RealtimeClientEvent::InputAudioBufferClear)
158            }
159            ClientEvent::InputAudioBufferCommit { .. } => {
160                Some(RealtimeClientEvent::InputAudioBufferCommit)
161            }
162            ClientEvent::OutputAudioBufferClear { .. } => {
163                Some(RealtimeClientEvent::OutputAudioBufferClear)
164            }
165            ClientEvent::ResponseCancel { .. } => Some(RealtimeClientEvent::ResponseCancel),
166            ClientEvent::ResponseCreate { .. } => Some(RealtimeClientEvent::ResponseCreate),
167            ClientEvent::Unknown => None,
168        }
169    }
170}
171
172// ============================================================================
173// Server Events
174// ============================================================================
175
176/// A server-to-client event in the OpenAI Realtime API.
177///
178/// Sent by the server over WebSocket, WebRTC, or SIP connections.
179/// Discriminated by the `type` field in the JSON wire format.
180///
181/// Large payloads (`SessionConfig` 624 B, `RealtimeResponse` 352 B) are
182/// `Box`-ed so the enum stays ≈232 bytes instead of ≈656.
183#[serde_with::skip_serializing_none]
184#[derive(Debug, Clone, Serialize, Deserialize)]
185#[serde(tag = "type")]
186pub enum ServerEvent {
187    // ---- Session events ----
188    /// Emitted when a new connection is established with the default session config.
189    #[serde(rename = "session.created")]
190    SessionCreated {
191        event_id: String,
192        session: Box<SessionConfig>,
193    },
194
195    /// Emitted after a successful `session.update`.
196    #[serde(rename = "session.updated")]
197    SessionUpdated {
198        event_id: String,
199        session: Box<SessionConfig>,
200    },
201
202    // ---- Conversation events ----
203    /// Emitted when a conversation is created (right after session creation).
204    #[serde(rename = "conversation.created")]
205    ConversationCreated {
206        conversation: Conversation,
207        event_id: String,
208    },
209
210    /// Emitted when a conversation item is created (legacy event).
211    #[serde(rename = "conversation.item.created")]
212    ConversationItemCreated {
213        event_id: String,
214        item: RealtimeConversationItem,
215        previous_item_id: Option<String>,
216    },
217
218    /// Emitted when an item is added to the default conversation.
219    #[serde(rename = "conversation.item.added")]
220    ConversationItemAdded {
221        event_id: String,
222        item: RealtimeConversationItem,
223        previous_item_id: Option<String>,
224    },
225
226    /// Emitted when a conversation item is finalized.
227    #[serde(rename = "conversation.item.done")]
228    ConversationItemDone {
229        event_id: String,
230        item: RealtimeConversationItem,
231        previous_item_id: Option<String>,
232    },
233
234    /// Emitted when a conversation item is deleted.
235    #[serde(rename = "conversation.item.deleted")]
236    ConversationItemDeleted { event_id: String, item_id: String },
237
238    /// Emitted in response to `conversation.item.retrieve`.
239    #[serde(rename = "conversation.item.retrieved")]
240    ConversationItemRetrieved {
241        event_id: String,
242        item: RealtimeConversationItem,
243    },
244
245    /// Emitted when an assistant audio message item is truncated.
246    #[serde(rename = "conversation.item.truncated")]
247    ConversationItemTruncated {
248        audio_end_ms: u32,
249        content_index: u32,
250        event_id: String,
251        item_id: String,
252    },
253
254    // ---- Input audio transcription events ----
255    /// Emitted when input audio transcription completes.
256    #[serde(rename = "conversation.item.input_audio_transcription.completed")]
257    InputAudioTranscriptionCompleted {
258        content_index: u32,
259        event_id: String,
260        item_id: String,
261        transcript: String,
262        usage: TranscriptionUsage,
263        logprobs: Option<Vec<LogProbProperties>>,
264    },
265
266    /// Emitted with incremental transcription results.
267    #[serde(rename = "conversation.item.input_audio_transcription.delta")]
268    InputAudioTranscriptionDelta {
269        event_id: String,
270        item_id: String,
271        content_index: Option<u32>,
272        delta: Option<String>,
273        logprobs: Option<Vec<LogProbProperties>>,
274    },
275
276    /// Emitted when input audio transcription fails.
277    #[serde(rename = "conversation.item.input_audio_transcription.failed")]
278    InputAudioTranscriptionFailed {
279        content_index: u32,
280        error: TranscriptionError,
281        event_id: String,
282        item_id: String,
283    },
284
285    /// Emitted when an input audio transcription segment is identified
286    /// (used with diarization models).
287    #[serde(rename = "conversation.item.input_audio_transcription.segment")]
288    InputAudioTranscriptionSegment {
289        id: String,
290        content_index: u32,
291        end: f32,
292        event_id: String,
293        item_id: String,
294        speaker: String,
295        start: f32,
296        text: String,
297    },
298
299    // ---- Input audio buffer events ----
300    /// Emitted when the input audio buffer is cleared.
301    #[serde(rename = "input_audio_buffer.cleared")]
302    InputAudioBufferCleared { event_id: String },
303
304    /// Emitted when the input audio buffer is committed.
305    #[serde(rename = "input_audio_buffer.committed")]
306    InputAudioBufferCommitted {
307        event_id: String,
308        item_id: String,
309        previous_item_id: Option<String>,
310    },
311
312    /// Emitted when speech is detected in the audio buffer (server VAD mode).
313    #[serde(rename = "input_audio_buffer.speech_started")]
314    InputAudioBufferSpeechStarted {
315        audio_start_ms: u32,
316        event_id: String,
317        item_id: String,
318    },
319
320    /// Emitted when the end of speech is detected (server VAD mode).
321    #[serde(rename = "input_audio_buffer.speech_stopped")]
322    InputAudioBufferSpeechStopped {
323        audio_end_ms: u32,
324        event_id: String,
325        item_id: String,
326    },
327
328    /// Emitted when the VAD idle timeout triggers.
329    #[serde(rename = "input_audio_buffer.timeout_triggered")]
330    InputAudioBufferTimeoutTriggered {
331        audio_end_ms: u32,
332        audio_start_ms: u32,
333        event_id: String,
334        item_id: String,
335    },
336
337    /// **SIP Only:** Emitted when a DTMF keypad event is received.
338    ///
339    /// NOTE: This is the only server event without an `event_id` field per the
340    /// OpenAI spec. Downstream code that generically extracts `event_id` from
341    /// all server events must handle this variant as a special case.
342    #[serde(rename = "input_audio_buffer.dtmf_event_received")]
343    InputAudioBufferDtmfEventReceived { event: String, received_at: i64 },
344
345    // ---- Output audio buffer events (WebRTC/SIP only) ----
346    /// Emitted when the server begins streaming audio to the client.
347    #[serde(rename = "output_audio_buffer.started")]
348    OutputAudioBufferStarted {
349        event_id: String,
350        response_id: String,
351    },
352
353    /// Emitted when the output audio buffer has been completely drained.
354    #[serde(rename = "output_audio_buffer.stopped")]
355    OutputAudioBufferStopped {
356        event_id: String,
357        response_id: String,
358    },
359
360    /// Emitted when the output audio buffer is cleared (user interrupt or
361    /// explicit `output_audio_buffer.clear`).
362    #[serde(rename = "output_audio_buffer.cleared")]
363    OutputAudioBufferCleared {
364        event_id: String,
365        response_id: String,
366    },
367
368    // ---- Response lifecycle events ----
369    /// Emitted when a new response is created (status `in_progress`).
370    #[serde(rename = "response.created")]
371    ResponseCreated {
372        event_id: String,
373        response: Box<RealtimeResponse>,
374    },
375
376    /// Emitted when a response is done streaming.
377    #[serde(rename = "response.done")]
378    ResponseDone {
379        event_id: String,
380        response: Box<RealtimeResponse>,
381    },
382
383    // ---- Response output item events ----
384    /// Emitted when a new output item is created during response generation.
385    #[serde(rename = "response.output_item.added")]
386    ResponseOutputItemAdded {
387        event_id: String,
388        item: RealtimeConversationItem,
389        output_index: u32,
390        response_id: String,
391    },
392
393    /// Emitted when an output item is done streaming.
394    #[serde(rename = "response.output_item.done")]
395    ResponseOutputItemDone {
396        event_id: String,
397        item: RealtimeConversationItem,
398        output_index: u32,
399        response_id: String,
400    },
401
402    // ---- Response content part events ----
403    /// Emitted when a new content part is added to an assistant message.
404    #[serde(rename = "response.content_part.added")]
405    ResponseContentPartAdded {
406        content_index: u32,
407        event_id: String,
408        item_id: String,
409        output_index: u32,
410        part: ResponseContentPart,
411        response_id: String,
412    },
413
414    /// Emitted when a content part is done streaming.
415    #[serde(rename = "response.content_part.done")]
416    ResponseContentPartDone {
417        content_index: u32,
418        event_id: String,
419        item_id: String,
420        output_index: u32,
421        part: ResponseContentPart,
422        response_id: String,
423    },
424
425    // ---- Response text events ----
426    /// Emitted when the text of an output_text content part is updated.
427    #[serde(rename = "response.output_text.delta")]
428    ResponseOutputTextDelta {
429        content_index: u32,
430        delta: String,
431        event_id: String,
432        item_id: String,
433        output_index: u32,
434        response_id: String,
435    },
436
437    /// Emitted when an output_text content part is done streaming.
438    #[serde(rename = "response.output_text.done")]
439    ResponseOutputTextDone {
440        content_index: u32,
441        event_id: String,
442        item_id: String,
443        output_index: u32,
444        response_id: String,
445        text: String,
446    },
447
448    // ---- Response audio events ----
449    /// Emitted when model-generated audio is updated.
450    ///
451    /// WARNING: `delta` contains a base64 audio chunk. Avoid logging this
452    /// variant with `Debug` in production; prefer `event_type()`.
453    #[serde(rename = "response.output_audio.delta")]
454    ResponseOutputAudioDelta {
455        content_index: u32,
456        delta: String,
457        event_id: String,
458        item_id: String,
459        output_index: u32,
460        response_id: String,
461    },
462
463    /// Emitted when model-generated audio is done.
464    #[serde(rename = "response.output_audio.done")]
465    ResponseOutputAudioDone {
466        content_index: u32,
467        event_id: String,
468        item_id: String,
469        output_index: u32,
470        response_id: String,
471    },
472
473    // ---- Response audio transcript events ----
474    /// Emitted when the transcription of audio output is updated.
475    #[serde(rename = "response.output_audio_transcript.delta")]
476    ResponseOutputAudioTranscriptDelta {
477        content_index: u32,
478        delta: String,
479        event_id: String,
480        item_id: String,
481        output_index: u32,
482        response_id: String,
483    },
484
485    /// Emitted when the transcription of audio output is done.
486    #[serde(rename = "response.output_audio_transcript.done")]
487    ResponseOutputAudioTranscriptDone {
488        content_index: u32,
489        event_id: String,
490        item_id: String,
491        output_index: u32,
492        response_id: String,
493        transcript: String,
494    },
495
496    // ---- Response function call events ----
497    /// Emitted when function call arguments are updated.
498    #[serde(rename = "response.function_call_arguments.delta")]
499    ResponseFunctionCallArgumentsDelta {
500        call_id: String,
501        delta: String,
502        event_id: String,
503        item_id: String,
504        output_index: u32,
505        response_id: String,
506    },
507
508    /// Emitted when function call arguments are done streaming.
509    #[serde(rename = "response.function_call_arguments.done")]
510    ResponseFunctionCallArgumentsDone {
511        arguments: String,
512        call_id: String,
513        event_id: String,
514        item_id: String,
515        name: String,
516        output_index: u32,
517        response_id: String,
518    },
519
520    // ---- Response MCP call events ----
521    /// Emitted when MCP tool call arguments are updated.
522    #[serde(rename = "response.mcp_call_arguments.delta")]
523    ResponseMcpCallArgumentsDelta {
524        delta: String,
525        event_id: String,
526        item_id: String,
527        output_index: u32,
528        response_id: String,
529        obfuscation: Option<String>,
530    },
531
532    /// Emitted when MCP tool call arguments are finalized.
533    #[serde(rename = "response.mcp_call_arguments.done")]
534    ResponseMcpCallArgumentsDone {
535        arguments: String,
536        event_id: String,
537        item_id: String,
538        output_index: u32,
539        response_id: String,
540    },
541
542    /// Emitted when an MCP tool call starts.
543    #[serde(rename = "response.mcp_call.in_progress")]
544    ResponseMcpCallInProgress {
545        event_id: String,
546        item_id: String,
547        output_index: u32,
548    },
549
550    /// Emitted when an MCP tool call completes successfully.
551    #[serde(rename = "response.mcp_call.completed")]
552    ResponseMcpCallCompleted {
553        event_id: String,
554        item_id: String,
555        output_index: u32,
556    },
557
558    /// Emitted when an MCP tool call fails.
559    #[serde(rename = "response.mcp_call.failed")]
560    ResponseMcpCallFailed {
561        event_id: String,
562        item_id: String,
563        output_index: u32,
564    },
565
566    // ---- MCP list tools events ----
567    /// Emitted when listing MCP tools is in progress.
568    #[serde(rename = "mcp_list_tools.in_progress")]
569    McpListToolsInProgress { event_id: String, item_id: String },
570
571    /// Emitted when listing MCP tools has completed.
572    #[serde(rename = "mcp_list_tools.completed")]
573    McpListToolsCompleted { event_id: String, item_id: String },
574
575    /// Emitted when listing MCP tools has failed.
576    #[serde(rename = "mcp_list_tools.failed")]
577    McpListToolsFailed { event_id: String, item_id: String },
578
579    // ---- Rate limits ----
580    /// Emitted at the beginning of a response with updated rate limit info.
581    #[serde(rename = "rate_limits.updated")]
582    RateLimitsUpdated {
583        event_id: String,
584        rate_limits: Vec<RealtimeRateLimit>,
585    },
586
587    // ---- Error ----
588    /// Emitted when an error occurs. Most errors are recoverable.
589    #[serde(rename = "error")]
590    Error {
591        error: RealtimeError,
592        event_id: String,
593    },
594
595    // ---- Unknown ----
596    /// Unrecognized event type. Serde automatically deserializes any
597    /// unrecognized `type` value into this variant (no data preserved).
598    /// For proxy use, forward the raw frame instead of re-serializing.
599    #[serde(other)]
600    Unknown,
601}
602
603impl ServerEvent {
604    /// Returns the event type string (e.g. `"session.created"`).
605    ///
606    /// For known events, returns a `&'static str` from the event type constants.
607    /// For unknown events, returns `"unknown"`.
608    pub fn event_type(&self) -> &str {
609        self.to_event_type()
610            .map(|e| e.as_str())
611            .unwrap_or("unknown")
612    }
613
614    /// Maps this event to its corresponding `RealtimeServerEvent` constant.
615    ///
616    /// Returns `None` for `Unknown` events.
617    pub fn to_event_type(&self) -> Option<RealtimeServerEvent> {
618        match self {
619            ServerEvent::SessionCreated { .. } => Some(RealtimeServerEvent::SessionCreated),
620            ServerEvent::SessionUpdated { .. } => Some(RealtimeServerEvent::SessionUpdated),
621            ServerEvent::ConversationCreated { .. } => {
622                Some(RealtimeServerEvent::ConversationCreated)
623            }
624            ServerEvent::ConversationItemCreated { .. } => {
625                Some(RealtimeServerEvent::ConversationItemCreated)
626            }
627            ServerEvent::ConversationItemAdded { .. } => {
628                Some(RealtimeServerEvent::ConversationItemAdded)
629            }
630            ServerEvent::ConversationItemDone { .. } => {
631                Some(RealtimeServerEvent::ConversationItemDone)
632            }
633            ServerEvent::ConversationItemDeleted { .. } => {
634                Some(RealtimeServerEvent::ConversationItemDeleted)
635            }
636            ServerEvent::ConversationItemRetrieved { .. } => {
637                Some(RealtimeServerEvent::ConversationItemRetrieved)
638            }
639            ServerEvent::ConversationItemTruncated { .. } => {
640                Some(RealtimeServerEvent::ConversationItemTruncated)
641            }
642            ServerEvent::InputAudioTranscriptionCompleted { .. } => {
643                Some(RealtimeServerEvent::ConversationItemInputAudioTranscriptionCompleted)
644            }
645            ServerEvent::InputAudioTranscriptionDelta { .. } => {
646                Some(RealtimeServerEvent::ConversationItemInputAudioTranscriptionDelta)
647            }
648            ServerEvent::InputAudioTranscriptionFailed { .. } => {
649                Some(RealtimeServerEvent::ConversationItemInputAudioTranscriptionFailed)
650            }
651            ServerEvent::InputAudioTranscriptionSegment { .. } => {
652                Some(RealtimeServerEvent::ConversationItemInputAudioTranscriptionSegment)
653            }
654            ServerEvent::InputAudioBufferCleared { .. } => {
655                Some(RealtimeServerEvent::InputAudioBufferCleared)
656            }
657            ServerEvent::InputAudioBufferCommitted { .. } => {
658                Some(RealtimeServerEvent::InputAudioBufferCommitted)
659            }
660            ServerEvent::InputAudioBufferSpeechStarted { .. } => {
661                Some(RealtimeServerEvent::InputAudioBufferSpeechStarted)
662            }
663            ServerEvent::InputAudioBufferSpeechStopped { .. } => {
664                Some(RealtimeServerEvent::InputAudioBufferSpeechStopped)
665            }
666            ServerEvent::InputAudioBufferTimeoutTriggered { .. } => {
667                Some(RealtimeServerEvent::InputAudioBufferTimeoutTriggered)
668            }
669            ServerEvent::InputAudioBufferDtmfEventReceived { .. } => {
670                Some(RealtimeServerEvent::InputAudioBufferDtmfEventReceived)
671            }
672            ServerEvent::OutputAudioBufferStarted { .. } => {
673                Some(RealtimeServerEvent::OutputAudioBufferStarted)
674            }
675            ServerEvent::OutputAudioBufferStopped { .. } => {
676                Some(RealtimeServerEvent::OutputAudioBufferStopped)
677            }
678            ServerEvent::OutputAudioBufferCleared { .. } => {
679                Some(RealtimeServerEvent::OutputAudioBufferCleared)
680            }
681            ServerEvent::ResponseCreated { .. } => Some(RealtimeServerEvent::ResponseCreated),
682            ServerEvent::ResponseDone { .. } => Some(RealtimeServerEvent::ResponseDone),
683            ServerEvent::ResponseOutputItemAdded { .. } => {
684                Some(RealtimeServerEvent::ResponseOutputItemAdded)
685            }
686            ServerEvent::ResponseOutputItemDone { .. } => {
687                Some(RealtimeServerEvent::ResponseOutputItemDone)
688            }
689            ServerEvent::ResponseContentPartAdded { .. } => {
690                Some(RealtimeServerEvent::ResponseContentPartAdded)
691            }
692            ServerEvent::ResponseContentPartDone { .. } => {
693                Some(RealtimeServerEvent::ResponseContentPartDone)
694            }
695            ServerEvent::ResponseOutputTextDelta { .. } => {
696                Some(RealtimeServerEvent::ResponseOutputTextDelta)
697            }
698            ServerEvent::ResponseOutputTextDone { .. } => {
699                Some(RealtimeServerEvent::ResponseOutputTextDone)
700            }
701            ServerEvent::ResponseOutputAudioDelta { .. } => {
702                Some(RealtimeServerEvent::ResponseOutputAudioDelta)
703            }
704            ServerEvent::ResponseOutputAudioDone { .. } => {
705                Some(RealtimeServerEvent::ResponseOutputAudioDone)
706            }
707            ServerEvent::ResponseOutputAudioTranscriptDelta { .. } => {
708                Some(RealtimeServerEvent::ResponseOutputAudioTranscriptDelta)
709            }
710            ServerEvent::ResponseOutputAudioTranscriptDone { .. } => {
711                Some(RealtimeServerEvent::ResponseOutputAudioTranscriptDone)
712            }
713            ServerEvent::ResponseFunctionCallArgumentsDelta { .. } => {
714                Some(RealtimeServerEvent::ResponseFunctionCallArgumentsDelta)
715            }
716            ServerEvent::ResponseFunctionCallArgumentsDone { .. } => {
717                Some(RealtimeServerEvent::ResponseFunctionCallArgumentsDone)
718            }
719            ServerEvent::ResponseMcpCallArgumentsDelta { .. } => {
720                Some(RealtimeServerEvent::ResponseMcpCallArgumentsDelta)
721            }
722            ServerEvent::ResponseMcpCallArgumentsDone { .. } => {
723                Some(RealtimeServerEvent::ResponseMcpCallArgumentsDone)
724            }
725            ServerEvent::ResponseMcpCallInProgress { .. } => {
726                Some(RealtimeServerEvent::ResponseMcpCallInProgress)
727            }
728            ServerEvent::ResponseMcpCallCompleted { .. } => {
729                Some(RealtimeServerEvent::ResponseMcpCallCompleted)
730            }
731            ServerEvent::ResponseMcpCallFailed { .. } => {
732                Some(RealtimeServerEvent::ResponseMcpCallFailed)
733            }
734            ServerEvent::McpListToolsInProgress { .. } => {
735                Some(RealtimeServerEvent::McpListToolsInProgress)
736            }
737            ServerEvent::McpListToolsCompleted { .. } => {
738                Some(RealtimeServerEvent::McpListToolsCompleted)
739            }
740            ServerEvent::McpListToolsFailed { .. } => Some(RealtimeServerEvent::McpListToolsFailed),
741            ServerEvent::RateLimitsUpdated { .. } => Some(RealtimeServerEvent::RateLimitsUpdated),
742            ServerEvent::Error { .. } => Some(RealtimeServerEvent::Error),
743            ServerEvent::Unknown => None,
744        }
745    }
746
747    /// Returns true if this is a `response.function_call_arguments.done` event.
748    pub fn is_function_call_done(&self) -> bool {
749        matches!(self, ServerEvent::ResponseFunctionCallArgumentsDone { .. })
750    }
751
752    /// For `response.function_call_arguments.done`, returns `(call_id, item_id, arguments)`.
753    pub fn get_function_call(&self) -> Option<(&str, &str, &str)> {
754        match self {
755            ServerEvent::ResponseFunctionCallArgumentsDone {
756                call_id,
757                item_id,
758                arguments,
759                ..
760            } => Some((call_id, item_id, arguments)),
761            _ => None,
762        }
763    }
764}
765
766// ============================================================================
767// Session Config Union
768// ============================================================================
769
770/// Union of realtime and transcription session configurations.
771///
772/// Discriminated by the `type` field: `"realtime"` or `"transcription"`.
773/// Used by `session.update`, `session.created`, and `session.updated` events.
774#[derive(Debug, Clone, Serialize, Deserialize)]
775#[serde(tag = "type")]
776pub enum SessionConfig {
777    #[serde(rename = "realtime")]
778    Realtime(Box<RealtimeSessionCreateRequest>),
779    #[serde(rename = "transcription")]
780    Transcription(Box<RealtimeTranscriptionSessionCreateRequest>),
781}
782
783// ============================================================================
784// Supporting Types
785// ============================================================================
786
787/// Conversation metadata returned in `conversation.created` events.
788#[serde_with::skip_serializing_none]
789#[derive(Debug, Clone, Serialize, Deserialize)]
790pub struct Conversation {
791    pub id: Option<String>,
792    pub object: Option<String>,
793}
794
795/// A content part within a response (used in `response.content_part.*` events).
796#[serde_with::skip_serializing_none]
797#[derive(Debug, Clone, Serialize, Deserialize)]
798pub struct ResponseContentPart {
799    pub audio: Option<String>,
800    pub text: Option<String>,
801    pub transcript: Option<String>,
802    #[serde(rename = "type")]
803    pub r#type: Option<String>,
804}
805
806/// Log probability entry for input audio transcription.
807#[derive(Debug, Clone, Serialize, Deserialize)]
808pub struct LogProbProperties {
809    pub token: String,
810    /// UTF-8 byte values of the token. Serializes as a JSON array of integers
811    /// (e.g. `[104, 101, 108, 108, 111]`), matching the OpenAI spec.
812    pub bytes: Vec<u8>,
813    pub logprob: f64,
814}
815
816/// Input token details for transcription usage.
817#[serde_with::skip_serializing_none]
818#[derive(Debug, Clone, Serialize, Deserialize)]
819pub struct TranscriptionTokenInputDetails {
820    pub audio_tokens: Option<u32>,
821    pub text_tokens: Option<u32>,
822}
823
824/// Usage statistics for input audio transcription.
825///
826/// Discriminated by the `type` field: `"tokens"` or `"duration"`.
827#[serde_with::skip_serializing_none]
828#[derive(Debug, Clone, Serialize, Deserialize)]
829#[serde(tag = "type")]
830pub enum TranscriptionUsage {
831    /// Token-based usage (e.g. for `gpt-4o-transcribe`).
832    #[serde(rename = "tokens")]
833    Tokens {
834        input_tokens: u32,
835        output_tokens: u32,
836        total_tokens: u32,
837        input_token_details: Option<TranscriptionTokenInputDetails>,
838    },
839    /// Duration-based usage (e.g. for `whisper-1`).
840    #[serde(rename = "duration")]
841    Duration { seconds: f64 },
842}
843
844/// Error details for a failed input audio transcription.
845#[serde_with::skip_serializing_none]
846#[derive(Debug, Clone, Serialize, Deserialize)]
847pub struct TranscriptionError {
848    pub code: Option<String>,
849    pub message: Option<String>,
850    pub param: Option<String>,
851    #[serde(rename = "type")]
852    pub r#type: Option<String>,
853}
854
855/// Rate limit information returned in `rate_limits.updated` events.
856#[serde_with::skip_serializing_none]
857#[derive(Debug, Clone, Serialize, Deserialize)]
858pub struct RealtimeRateLimit {
859    pub limit: Option<u32>,
860    pub name: Option<String>,
861    pub remaining: Option<u32>,
862    pub reset_seconds: Option<f64>,
863}
864
865/// Error details returned in the `error` server event.
866#[serde_with::skip_serializing_none]
867#[derive(Debug, Clone, Serialize, Deserialize)]
868pub struct RealtimeError {
869    pub message: String,
870    #[serde(rename = "type")]
871    pub r#type: String,
872    pub code: Option<String>,
873    pub event_id: Option<String>,
874    pub param: Option<String>,
875}