Skip to main content

openai_tools/realtime/events/
server.rs

1//! Server-to-client events for the Realtime API.
2
3use serde::{Deserialize, Serialize};
4
5use crate::realtime::audio::{AudioFormat, Voice};
6use crate::realtime::conversation::ItemStatus;
7use crate::realtime::session::{MaxTokens, Modality, RealtimeTool, ToolChoice};
8use crate::realtime::vad::TurnDetection;
9
10/// Server events received from the OpenAI Realtime API.
11#[derive(Debug, Clone, Deserialize)]
12#[serde(tag = "type")]
13pub enum ServerEvent {
14    // ==================== Session Events ====================
15    /// Session was created (first event after connection).
16    #[serde(rename = "session.created")]
17    SessionCreated(SessionCreatedEvent),
18
19    /// Session configuration was updated.
20    #[serde(rename = "session.updated")]
21    SessionUpdated(SessionUpdatedEvent),
22
23    // ==================== Conversation Events ====================
24    /// Conversation was created.
25    #[serde(rename = "conversation.created")]
26    ConversationCreated(ConversationCreatedEvent),
27
28    /// Conversation item was created.
29    #[serde(rename = "conversation.item.created")]
30    ConversationItemCreated(ConversationItemCreatedEvent),
31
32    /// Conversation item was retrieved.
33    #[serde(rename = "conversation.item.retrieved")]
34    ConversationItemRetrieved(ConversationItemRetrievedEvent),
35
36    /// Conversation item was deleted.
37    #[serde(rename = "conversation.item.deleted")]
38    ConversationItemDeleted(ConversationItemDeletedEvent),
39
40    /// Conversation item was truncated.
41    #[serde(rename = "conversation.item.truncated")]
42    ConversationItemTruncated(ConversationItemTruncatedEvent),
43
44    /// Input audio transcription completed.
45    #[serde(rename = "conversation.item.input_audio_transcription.completed")]
46    InputAudioTranscriptionCompleted(InputAudioTranscriptionCompletedEvent),
47
48    /// Input audio transcription failed.
49    #[serde(rename = "conversation.item.input_audio_transcription.failed")]
50    InputAudioTranscriptionFailed(InputAudioTranscriptionFailedEvent),
51
52    // ==================== Input Audio Buffer Events ====================
53    /// Input audio buffer was committed.
54    #[serde(rename = "input_audio_buffer.committed")]
55    InputAudioBufferCommitted(InputAudioBufferCommittedEvent),
56
57    /// Input audio buffer was cleared.
58    #[serde(rename = "input_audio_buffer.cleared")]
59    InputAudioBufferCleared(InputAudioBufferClearedEvent),
60
61    /// Speech started in input audio.
62    #[serde(rename = "input_audio_buffer.speech_started")]
63    InputAudioBufferSpeechStarted(SpeechStartedEvent),
64
65    /// Speech stopped in input audio.
66    #[serde(rename = "input_audio_buffer.speech_stopped")]
67    InputAudioBufferSpeechStopped(SpeechStoppedEvent),
68
69    // ==================== Output Audio Buffer Events (WebRTC) ====================
70    /// Output audio buffer playback started.
71    #[serde(rename = "output_audio_buffer.started")]
72    OutputAudioBufferStarted(OutputAudioBufferEvent),
73
74    /// Output audio buffer playback stopped.
75    #[serde(rename = "output_audio_buffer.stopped")]
76    OutputAudioBufferStopped(OutputAudioBufferStoppedEvent),
77
78    /// Output audio buffer was cleared.
79    #[serde(rename = "output_audio_buffer.cleared")]
80    OutputAudioBufferCleared(OutputAudioBufferEvent),
81
82    // ==================== Response Events ====================
83    /// Response was created.
84    #[serde(rename = "response.created")]
85    ResponseCreated(ResponseCreatedEvent),
86
87    /// Response generation completed.
88    #[serde(rename = "response.done")]
89    ResponseDone(ResponseDoneEvent),
90
91    /// Output item was added to response.
92    #[serde(rename = "response.output_item.added")]
93    ResponseOutputItemAdded(ResponseOutputItemEvent),
94
95    /// Output item completed.
96    #[serde(rename = "response.output_item.done")]
97    ResponseOutputItemDone(ResponseOutputItemEvent),
98
99    /// Content part was added.
100    #[serde(rename = "response.content_part.added")]
101    ResponseContentPartAdded(ResponseContentPartEvent),
102
103    /// Content part completed.
104    #[serde(rename = "response.content_part.done")]
105    ResponseContentPartDone(ResponseContentPartEvent),
106
107    /// Text delta received.
108    #[serde(rename = "response.text.delta")]
109    ResponseTextDelta(ResponseTextDeltaEvent),
110
111    /// Text output completed.
112    #[serde(rename = "response.text.done")]
113    ResponseTextDone(ResponseTextDoneEvent),
114
115    /// Audio delta received.
116    #[serde(rename = "response.audio.delta")]
117    ResponseAudioDelta(ResponseAudioDeltaEvent),
118
119    /// Audio output completed.
120    #[serde(rename = "response.audio.done")]
121    ResponseAudioDone(ResponseAudioDoneEvent),
122
123    /// Audio transcript delta received.
124    #[serde(rename = "response.audio_transcript.delta")]
125    ResponseAudioTranscriptDelta(ResponseAudioTranscriptDeltaEvent),
126
127    /// Audio transcript completed.
128    #[serde(rename = "response.audio_transcript.done")]
129    ResponseAudioTranscriptDone(ResponseAudioTranscriptDoneEvent),
130
131    /// Function call arguments delta.
132    #[serde(rename = "response.function_call_arguments.delta")]
133    ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDeltaEvent),
134
135    /// Function call arguments completed.
136    #[serde(rename = "response.function_call_arguments.done")]
137    ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent),
138
139    // ==================== Rate Limits ====================
140    /// Rate limits updated.
141    #[serde(rename = "rate_limits.updated")]
142    RateLimitsUpdated(RateLimitsUpdatedEvent),
143
144    // ==================== Error ====================
145    /// Error occurred.
146    #[serde(rename = "error")]
147    Error(ErrorEvent),
148}
149
150// ==================== Session Event Types ====================
151
152/// Session created event payload.
153#[derive(Debug, Clone, Deserialize)]
154pub struct SessionCreatedEvent {
155    pub event_id: String,
156    pub session: SessionInfo,
157}
158
159/// Session updated event payload.
160#[derive(Debug, Clone, Deserialize)]
161pub struct SessionUpdatedEvent {
162    pub event_id: String,
163    pub session: SessionInfo,
164}
165
166/// Session information.
167#[derive(Debug, Clone, Deserialize)]
168pub struct SessionInfo {
169    pub id: String,
170    pub object: String,
171    pub model: String,
172    #[serde(default)]
173    pub modalities: Vec<Modality>,
174    #[serde(default)]
175    pub instructions: String,
176    pub voice: Option<Voice>,
177    pub input_audio_format: Option<AudioFormat>,
178    pub output_audio_format: Option<AudioFormat>,
179    pub turn_detection: Option<TurnDetection>,
180    #[serde(default)]
181    pub tools: Vec<RealtimeTool>,
182    pub tool_choice: Option<ToolChoice>,
183    pub temperature: Option<f32>,
184    pub max_response_output_tokens: Option<MaxTokens>,
185}
186
187// ==================== Conversation Event Types ====================
188
189/// Conversation created event.
190#[derive(Debug, Clone, Deserialize)]
191pub struct ConversationCreatedEvent {
192    pub event_id: String,
193    pub conversation: ConversationInfo,
194}
195
196/// Conversation information.
197#[derive(Debug, Clone, Deserialize)]
198pub struct ConversationInfo {
199    pub id: String,
200    pub object: String,
201}
202
203/// Conversation item created event.
204#[derive(Debug, Clone, Deserialize)]
205pub struct ConversationItemCreatedEvent {
206    pub event_id: String,
207    #[serde(default)]
208    pub previous_item_id: Option<String>,
209    pub item: ResponseItem,
210}
211
212/// Conversation item retrieved event.
213#[derive(Debug, Clone, Deserialize)]
214pub struct ConversationItemRetrievedEvent {
215    pub event_id: String,
216    pub item: ResponseItem,
217}
218
219/// Conversation item deleted event.
220#[derive(Debug, Clone, Deserialize)]
221pub struct ConversationItemDeletedEvent {
222    pub event_id: String,
223    pub item_id: String,
224}
225
226/// Conversation item truncated event.
227#[derive(Debug, Clone, Deserialize)]
228pub struct ConversationItemTruncatedEvent {
229    pub event_id: String,
230    pub item_id: String,
231    pub content_index: u32,
232    pub audio_end_ms: u32,
233}
234
235/// Input audio transcription completed event.
236#[derive(Debug, Clone, Deserialize)]
237pub struct InputAudioTranscriptionCompletedEvent {
238    pub event_id: String,
239    pub item_id: String,
240    pub content_index: u32,
241    pub transcript: String,
242}
243
244/// Input audio transcription failed event.
245#[derive(Debug, Clone, Deserialize)]
246pub struct InputAudioTranscriptionFailedEvent {
247    pub event_id: String,
248    pub item_id: String,
249    pub content_index: u32,
250    pub error: RealtimeError,
251}
252
253// ==================== Input Audio Buffer Event Types ====================
254
255/// Input audio buffer committed event.
256#[derive(Debug, Clone, Deserialize)]
257pub struct InputAudioBufferCommittedEvent {
258    pub event_id: String,
259    #[serde(default)]
260    pub previous_item_id: Option<String>,
261    pub item_id: String,
262}
263
264/// Input audio buffer cleared event.
265#[derive(Debug, Clone, Deserialize)]
266pub struct InputAudioBufferClearedEvent {
267    pub event_id: String,
268}
269
270/// Speech started event.
271#[derive(Debug, Clone, Deserialize)]
272pub struct SpeechStartedEvent {
273    pub event_id: String,
274    pub audio_start_ms: u32,
275    pub item_id: String,
276}
277
278/// Speech stopped event.
279#[derive(Debug, Clone, Deserialize)]
280pub struct SpeechStoppedEvent {
281    pub event_id: String,
282    pub audio_end_ms: u32,
283    #[serde(default)]
284    pub item_id: Option<String>,
285}
286
287// ==================== Output Audio Buffer Event Types ====================
288
289/// Output audio buffer event (started/cleared).
290#[derive(Debug, Clone, Deserialize)]
291pub struct OutputAudioBufferEvent {
292    pub event_id: String,
293    pub response_id: String,
294}
295
296/// Output audio buffer stopped event.
297#[derive(Debug, Clone, Deserialize)]
298pub struct OutputAudioBufferStoppedEvent {
299    pub event_id: String,
300    pub response_id: String,
301    pub audio_end_ms: u32,
302    pub item_id: String,
303}
304
305// ==================== Response Event Types ====================
306
307/// Response created event.
308#[derive(Debug, Clone, Deserialize)]
309pub struct ResponseCreatedEvent {
310    pub event_id: String,
311    pub response: ResponseInfo,
312}
313
314/// Response done event.
315#[derive(Debug, Clone, Deserialize)]
316pub struct ResponseDoneEvent {
317    pub event_id: String,
318    pub response: ResponseInfo,
319}
320
321/// Response information.
322#[derive(Debug, Clone, Deserialize)]
323pub struct ResponseInfo {
324    pub id: String,
325    pub object: String,
326    pub status: ResponseStatus,
327    #[serde(default)]
328    pub status_details: Option<serde_json::Value>,
329    #[serde(default)]
330    pub output: Vec<ResponseItem>,
331    #[serde(default)]
332    pub usage: Option<RealtimeUsage>,
333}
334
335/// Response status.
336#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
337#[serde(rename_all = "snake_case")]
338pub enum ResponseStatus {
339    InProgress,
340    Completed,
341    Cancelled,
342    Incomplete,
343    Failed,
344}
345
346/// Response item (output item in a response).
347#[derive(Debug, Clone, Deserialize)]
348pub struct ResponseItem {
349    pub id: String,
350    pub object: String,
351    #[serde(rename = "type")]
352    pub item_type: String,
353    #[serde(default)]
354    pub role: Option<String>,
355    #[serde(default)]
356    pub content: Vec<ResponseContentPart>,
357    #[serde(default)]
358    pub status: Option<ItemStatus>,
359    // Function call fields
360    #[serde(default)]
361    pub call_id: Option<String>,
362    #[serde(default)]
363    pub name: Option<String>,
364    #[serde(default)]
365    pub arguments: Option<String>,
366    #[serde(default)]
367    pub output: Option<String>,
368}
369
370/// Response content part.
371#[derive(Debug, Clone, Deserialize)]
372pub struct ResponseContentPart {
373    #[serde(rename = "type")]
374    pub content_type: String,
375    #[serde(default)]
376    pub text: Option<String>,
377    #[serde(default)]
378    pub audio: Option<String>,
379    #[serde(default)]
380    pub transcript: Option<String>,
381}
382
383/// Response output item event.
384#[derive(Debug, Clone, Deserialize)]
385pub struct ResponseOutputItemEvent {
386    pub event_id: String,
387    pub response_id: String,
388    pub output_index: u32,
389    pub item: ResponseItem,
390}
391
392/// Response content part event.
393#[derive(Debug, Clone, Deserialize)]
394pub struct ResponseContentPartEvent {
395    pub event_id: String,
396    pub response_id: String,
397    pub item_id: String,
398    pub output_index: u32,
399    pub content_index: u32,
400    pub part: ResponseContentPart,
401}
402
403/// Response text delta event.
404#[derive(Debug, Clone, Deserialize)]
405pub struct ResponseTextDeltaEvent {
406    pub event_id: String,
407    pub response_id: String,
408    pub item_id: String,
409    pub output_index: u32,
410    pub content_index: u32,
411    pub delta: String,
412}
413
414/// Response text done event.
415#[derive(Debug, Clone, Deserialize)]
416pub struct ResponseTextDoneEvent {
417    pub event_id: String,
418    pub response_id: String,
419    pub item_id: String,
420    pub output_index: u32,
421    pub content_index: u32,
422    pub text: String,
423}
424
425/// Response audio delta event.
426#[derive(Debug, Clone, Deserialize)]
427pub struct ResponseAudioDeltaEvent {
428    pub event_id: String,
429    pub response_id: String,
430    pub item_id: String,
431    pub output_index: u32,
432    pub content_index: u32,
433    /// Base64-encoded audio chunk.
434    pub delta: String,
435}
436
437/// Response audio done event.
438#[derive(Debug, Clone, Deserialize)]
439pub struct ResponseAudioDoneEvent {
440    pub event_id: String,
441    pub response_id: String,
442    pub item_id: String,
443    pub output_index: u32,
444    pub content_index: u32,
445}
446
447/// Response audio transcript delta event.
448#[derive(Debug, Clone, Deserialize)]
449pub struct ResponseAudioTranscriptDeltaEvent {
450    pub event_id: String,
451    pub response_id: String,
452    pub item_id: String,
453    pub output_index: u32,
454    pub content_index: u32,
455    pub delta: String,
456}
457
458/// Response audio transcript done event.
459#[derive(Debug, Clone, Deserialize)]
460pub struct ResponseAudioTranscriptDoneEvent {
461    pub event_id: String,
462    pub response_id: String,
463    pub item_id: String,
464    pub output_index: u32,
465    pub content_index: u32,
466    pub transcript: String,
467}
468
469/// Response function call arguments delta event.
470#[derive(Debug, Clone, Deserialize)]
471pub struct ResponseFunctionCallArgumentsDeltaEvent {
472    pub event_id: String,
473    pub response_id: String,
474    pub item_id: String,
475    pub output_index: u32,
476    pub call_id: String,
477    pub delta: String,
478}
479
480/// Response function call arguments done event.
481#[derive(Debug, Clone, Deserialize)]
482pub struct ResponseFunctionCallArgumentsDoneEvent {
483    pub event_id: String,
484    pub response_id: String,
485    pub item_id: String,
486    pub output_index: u32,
487    pub call_id: String,
488    pub name: String,
489    pub arguments: String,
490}
491
492// ==================== Rate Limits ====================
493
494/// Rate limits updated event.
495#[derive(Debug, Clone, Deserialize)]
496pub struct RateLimitsUpdatedEvent {
497    pub event_id: String,
498    pub rate_limits: Vec<RateLimit>,
499}
500
501/// Rate limit information.
502#[derive(Debug, Clone, Deserialize)]
503pub struct RateLimit {
504    pub name: String,
505    pub limit: u32,
506    pub remaining: u32,
507    pub reset_seconds: f32,
508}
509
510// ==================== Error ====================
511
512/// Error event.
513#[derive(Debug, Clone, Deserialize)]
514pub struct ErrorEvent {
515    pub event_id: String,
516    pub error: RealtimeError,
517}
518
519/// Realtime API error.
520#[derive(Debug, Clone, Deserialize)]
521pub struct RealtimeError {
522    #[serde(rename = "type")]
523    pub error_type: Option<String>,
524    pub code: Option<String>,
525    pub message: String,
526    #[serde(default)]
527    pub param: Option<String>,
528    #[serde(default)]
529    pub event_id: Option<String>,
530}
531
532// ==================== Usage ====================
533
534/// Token usage information.
535#[derive(Debug, Clone, Default, Deserialize)]
536pub struct RealtimeUsage {
537    pub total_tokens: u32,
538    pub input_tokens: u32,
539    pub output_tokens: u32,
540    #[serde(default)]
541    pub input_token_details: Option<InputTokenDetails>,
542    #[serde(default)]
543    pub output_token_details: Option<OutputTokenDetails>,
544}
545
546/// Input token details.
547#[derive(Debug, Clone, Default, Deserialize)]
548pub struct InputTokenDetails {
549    #[serde(default)]
550    pub cached_tokens: u32,
551    #[serde(default)]
552    pub text_tokens: u32,
553    #[serde(default)]
554    pub audio_tokens: u32,
555}
556
557/// Output token details.
558#[derive(Debug, Clone, Default, Deserialize)]
559pub struct OutputTokenDetails {
560    #[serde(default)]
561    pub text_tokens: u32,
562    #[serde(default)]
563    pub audio_tokens: u32,
564}
565
566impl ServerEvent {
567    /// Check if this is an error event.
568    pub fn is_error(&self) -> bool {
569        matches!(self, Self::Error(_))
570    }
571
572    /// Get the event ID if available.
573    pub fn event_id(&self) -> Option<&str> {
574        match self {
575            Self::SessionCreated(e) => Some(&e.event_id),
576            Self::SessionUpdated(e) => Some(&e.event_id),
577            Self::ConversationCreated(e) => Some(&e.event_id),
578            Self::ConversationItemCreated(e) => Some(&e.event_id),
579            Self::ConversationItemRetrieved(e) => Some(&e.event_id),
580            Self::ConversationItemDeleted(e) => Some(&e.event_id),
581            Self::ConversationItemTruncated(e) => Some(&e.event_id),
582            Self::InputAudioTranscriptionCompleted(e) => Some(&e.event_id),
583            Self::InputAudioTranscriptionFailed(e) => Some(&e.event_id),
584            Self::InputAudioBufferCommitted(e) => Some(&e.event_id),
585            Self::InputAudioBufferCleared(e) => Some(&e.event_id),
586            Self::InputAudioBufferSpeechStarted(e) => Some(&e.event_id),
587            Self::InputAudioBufferSpeechStopped(e) => Some(&e.event_id),
588            Self::OutputAudioBufferStarted(e) => Some(&e.event_id),
589            Self::OutputAudioBufferStopped(e) => Some(&e.event_id),
590            Self::OutputAudioBufferCleared(e) => Some(&e.event_id),
591            Self::ResponseCreated(e) => Some(&e.event_id),
592            Self::ResponseDone(e) => Some(&e.event_id),
593            Self::ResponseOutputItemAdded(e) => Some(&e.event_id),
594            Self::ResponseOutputItemDone(e) => Some(&e.event_id),
595            Self::ResponseContentPartAdded(e) => Some(&e.event_id),
596            Self::ResponseContentPartDone(e) => Some(&e.event_id),
597            Self::ResponseTextDelta(e) => Some(&e.event_id),
598            Self::ResponseTextDone(e) => Some(&e.event_id),
599            Self::ResponseAudioDelta(e) => Some(&e.event_id),
600            Self::ResponseAudioDone(e) => Some(&e.event_id),
601            Self::ResponseAudioTranscriptDelta(e) => Some(&e.event_id),
602            Self::ResponseAudioTranscriptDone(e) => Some(&e.event_id),
603            Self::ResponseFunctionCallArgumentsDelta(e) => Some(&e.event_id),
604            Self::ResponseFunctionCallArgumentsDone(e) => Some(&e.event_id),
605            Self::RateLimitsUpdated(e) => Some(&e.event_id),
606            Self::Error(e) => Some(&e.event_id),
607        }
608    }
609}