1use serde::{Deserialize, Serialize};
4
5use crate::realtime::audio::{AudioFormat, Voice};
6use crate::realtime::conversation::ItemStatus;
7use crate::realtime::session::{MaxTokens, Modality, RealtimeTool, ToolChoice};
8use crate::realtime::vad::TurnDetection;
9
10#[derive(Debug, Clone, Deserialize)]
12#[serde(tag = "type")]
13pub enum ServerEvent {
14 #[serde(rename = "session.created")]
17 SessionCreated(SessionCreatedEvent),
18
19 #[serde(rename = "session.updated")]
21 SessionUpdated(SessionUpdatedEvent),
22
23 #[serde(rename = "conversation.created")]
26 ConversationCreated(ConversationCreatedEvent),
27
28 #[serde(rename = "conversation.item.created")]
30 ConversationItemCreated(ConversationItemCreatedEvent),
31
32 #[serde(rename = "conversation.item.retrieved")]
34 ConversationItemRetrieved(ConversationItemRetrievedEvent),
35
36 #[serde(rename = "conversation.item.deleted")]
38 ConversationItemDeleted(ConversationItemDeletedEvent),
39
40 #[serde(rename = "conversation.item.truncated")]
42 ConversationItemTruncated(ConversationItemTruncatedEvent),
43
44 #[serde(rename = "conversation.item.input_audio_transcription.completed")]
46 InputAudioTranscriptionCompleted(InputAudioTranscriptionCompletedEvent),
47
48 #[serde(rename = "conversation.item.input_audio_transcription.failed")]
50 InputAudioTranscriptionFailed(InputAudioTranscriptionFailedEvent),
51
52 #[serde(rename = "input_audio_buffer.committed")]
55 InputAudioBufferCommitted(InputAudioBufferCommittedEvent),
56
57 #[serde(rename = "input_audio_buffer.cleared")]
59 InputAudioBufferCleared(InputAudioBufferClearedEvent),
60
61 #[serde(rename = "input_audio_buffer.speech_started")]
63 InputAudioBufferSpeechStarted(SpeechStartedEvent),
64
65 #[serde(rename = "input_audio_buffer.speech_stopped")]
67 InputAudioBufferSpeechStopped(SpeechStoppedEvent),
68
69 #[serde(rename = "output_audio_buffer.started")]
72 OutputAudioBufferStarted(OutputAudioBufferEvent),
73
74 #[serde(rename = "output_audio_buffer.stopped")]
76 OutputAudioBufferStopped(OutputAudioBufferStoppedEvent),
77
78 #[serde(rename = "output_audio_buffer.cleared")]
80 OutputAudioBufferCleared(OutputAudioBufferEvent),
81
82 #[serde(rename = "response.created")]
85 ResponseCreated(ResponseCreatedEvent),
86
87 #[serde(rename = "response.done")]
89 ResponseDone(ResponseDoneEvent),
90
91 #[serde(rename = "response.output_item.added")]
93 ResponseOutputItemAdded(ResponseOutputItemEvent),
94
95 #[serde(rename = "response.output_item.done")]
97 ResponseOutputItemDone(ResponseOutputItemEvent),
98
99 #[serde(rename = "response.content_part.added")]
101 ResponseContentPartAdded(ResponseContentPartEvent),
102
103 #[serde(rename = "response.content_part.done")]
105 ResponseContentPartDone(ResponseContentPartEvent),
106
107 #[serde(rename = "response.text.delta")]
109 ResponseTextDelta(ResponseTextDeltaEvent),
110
111 #[serde(rename = "response.text.done")]
113 ResponseTextDone(ResponseTextDoneEvent),
114
115 #[serde(rename = "response.audio.delta")]
117 ResponseAudioDelta(ResponseAudioDeltaEvent),
118
119 #[serde(rename = "response.audio.done")]
121 ResponseAudioDone(ResponseAudioDoneEvent),
122
123 #[serde(rename = "response.audio_transcript.delta")]
125 ResponseAudioTranscriptDelta(ResponseAudioTranscriptDeltaEvent),
126
127 #[serde(rename = "response.audio_transcript.done")]
129 ResponseAudioTranscriptDone(ResponseAudioTranscriptDoneEvent),
130
131 #[serde(rename = "response.function_call_arguments.delta")]
133 ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDeltaEvent),
134
135 #[serde(rename = "response.function_call_arguments.done")]
137 ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent),
138
139 #[serde(rename = "rate_limits.updated")]
142 RateLimitsUpdated(RateLimitsUpdatedEvent),
143
144 #[serde(rename = "error")]
147 Error(ErrorEvent),
148}
149
150#[derive(Debug, Clone, Deserialize)]
154pub struct SessionCreatedEvent {
155 pub event_id: String,
156 pub session: SessionInfo,
157}
158
159#[derive(Debug, Clone, Deserialize)]
161pub struct SessionUpdatedEvent {
162 pub event_id: String,
163 pub session: SessionInfo,
164}
165
166#[derive(Debug, Clone, Deserialize)]
168pub struct SessionInfo {
169 pub id: String,
170 pub object: String,
171 pub model: String,
172 #[serde(default)]
173 pub modalities: Vec<Modality>,
174 #[serde(default)]
175 pub instructions: String,
176 pub voice: Option<Voice>,
177 pub input_audio_format: Option<AudioFormat>,
178 pub output_audio_format: Option<AudioFormat>,
179 pub turn_detection: Option<TurnDetection>,
180 #[serde(default)]
181 pub tools: Vec<RealtimeTool>,
182 pub tool_choice: Option<ToolChoice>,
183 pub temperature: Option<f32>,
184 pub max_response_output_tokens: Option<MaxTokens>,
185}
186
187#[derive(Debug, Clone, Deserialize)]
191pub struct ConversationCreatedEvent {
192 pub event_id: String,
193 pub conversation: ConversationInfo,
194}
195
196#[derive(Debug, Clone, Deserialize)]
198pub struct ConversationInfo {
199 pub id: String,
200 pub object: String,
201}
202
203#[derive(Debug, Clone, Deserialize)]
205pub struct ConversationItemCreatedEvent {
206 pub event_id: String,
207 #[serde(default)]
208 pub previous_item_id: Option<String>,
209 pub item: ResponseItem,
210}
211
212#[derive(Debug, Clone, Deserialize)]
214pub struct ConversationItemRetrievedEvent {
215 pub event_id: String,
216 pub item: ResponseItem,
217}
218
219#[derive(Debug, Clone, Deserialize)]
221pub struct ConversationItemDeletedEvent {
222 pub event_id: String,
223 pub item_id: String,
224}
225
226#[derive(Debug, Clone, Deserialize)]
228pub struct ConversationItemTruncatedEvent {
229 pub event_id: String,
230 pub item_id: String,
231 pub content_index: u32,
232 pub audio_end_ms: u32,
233}
234
235#[derive(Debug, Clone, Deserialize)]
237pub struct InputAudioTranscriptionCompletedEvent {
238 pub event_id: String,
239 pub item_id: String,
240 pub content_index: u32,
241 pub transcript: String,
242}
243
244#[derive(Debug, Clone, Deserialize)]
246pub struct InputAudioTranscriptionFailedEvent {
247 pub event_id: String,
248 pub item_id: String,
249 pub content_index: u32,
250 pub error: RealtimeError,
251}
252
253#[derive(Debug, Clone, Deserialize)]
257pub struct InputAudioBufferCommittedEvent {
258 pub event_id: String,
259 #[serde(default)]
260 pub previous_item_id: Option<String>,
261 pub item_id: String,
262}
263
264#[derive(Debug, Clone, Deserialize)]
266pub struct InputAudioBufferClearedEvent {
267 pub event_id: String,
268}
269
270#[derive(Debug, Clone, Deserialize)]
272pub struct SpeechStartedEvent {
273 pub event_id: String,
274 pub audio_start_ms: u32,
275 pub item_id: String,
276}
277
278#[derive(Debug, Clone, Deserialize)]
280pub struct SpeechStoppedEvent {
281 pub event_id: String,
282 pub audio_end_ms: u32,
283 #[serde(default)]
284 pub item_id: Option<String>,
285}
286
287#[derive(Debug, Clone, Deserialize)]
291pub struct OutputAudioBufferEvent {
292 pub event_id: String,
293 pub response_id: String,
294}
295
296#[derive(Debug, Clone, Deserialize)]
298pub struct OutputAudioBufferStoppedEvent {
299 pub event_id: String,
300 pub response_id: String,
301 pub audio_end_ms: u32,
302 pub item_id: String,
303}
304
305#[derive(Debug, Clone, Deserialize)]
309pub struct ResponseCreatedEvent {
310 pub event_id: String,
311 pub response: ResponseInfo,
312}
313
314#[derive(Debug, Clone, Deserialize)]
316pub struct ResponseDoneEvent {
317 pub event_id: String,
318 pub response: ResponseInfo,
319}
320
321#[derive(Debug, Clone, Deserialize)]
323pub struct ResponseInfo {
324 pub id: String,
325 pub object: String,
326 pub status: ResponseStatus,
327 #[serde(default)]
328 pub status_details: Option<serde_json::Value>,
329 #[serde(default)]
330 pub output: Vec<ResponseItem>,
331 #[serde(default)]
332 pub usage: Option<RealtimeUsage>,
333}
334
335#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
337#[serde(rename_all = "snake_case")]
338pub enum ResponseStatus {
339 InProgress,
340 Completed,
341 Cancelled,
342 Incomplete,
343 Failed,
344}
345
346#[derive(Debug, Clone, Deserialize)]
348pub struct ResponseItem {
349 pub id: String,
350 pub object: String,
351 #[serde(rename = "type")]
352 pub item_type: String,
353 #[serde(default)]
354 pub role: Option<String>,
355 #[serde(default)]
356 pub content: Vec<ResponseContentPart>,
357 #[serde(default)]
358 pub status: Option<ItemStatus>,
359 #[serde(default)]
361 pub call_id: Option<String>,
362 #[serde(default)]
363 pub name: Option<String>,
364 #[serde(default)]
365 pub arguments: Option<String>,
366 #[serde(default)]
367 pub output: Option<String>,
368}
369
370#[derive(Debug, Clone, Deserialize)]
372pub struct ResponseContentPart {
373 #[serde(rename = "type")]
374 pub content_type: String,
375 #[serde(default)]
376 pub text: Option<String>,
377 #[serde(default)]
378 pub audio: Option<String>,
379 #[serde(default)]
380 pub transcript: Option<String>,
381}
382
383#[derive(Debug, Clone, Deserialize)]
385pub struct ResponseOutputItemEvent {
386 pub event_id: String,
387 pub response_id: String,
388 pub output_index: u32,
389 pub item: ResponseItem,
390}
391
392#[derive(Debug, Clone, Deserialize)]
394pub struct ResponseContentPartEvent {
395 pub event_id: String,
396 pub response_id: String,
397 pub item_id: String,
398 pub output_index: u32,
399 pub content_index: u32,
400 pub part: ResponseContentPart,
401}
402
403#[derive(Debug, Clone, Deserialize)]
405pub struct ResponseTextDeltaEvent {
406 pub event_id: String,
407 pub response_id: String,
408 pub item_id: String,
409 pub output_index: u32,
410 pub content_index: u32,
411 pub delta: String,
412}
413
414#[derive(Debug, Clone, Deserialize)]
416pub struct ResponseTextDoneEvent {
417 pub event_id: String,
418 pub response_id: String,
419 pub item_id: String,
420 pub output_index: u32,
421 pub content_index: u32,
422 pub text: String,
423}
424
425#[derive(Debug, Clone, Deserialize)]
427pub struct ResponseAudioDeltaEvent {
428 pub event_id: String,
429 pub response_id: String,
430 pub item_id: String,
431 pub output_index: u32,
432 pub content_index: u32,
433 pub delta: String,
435}
436
437#[derive(Debug, Clone, Deserialize)]
439pub struct ResponseAudioDoneEvent {
440 pub event_id: String,
441 pub response_id: String,
442 pub item_id: String,
443 pub output_index: u32,
444 pub content_index: u32,
445}
446
447#[derive(Debug, Clone, Deserialize)]
449pub struct ResponseAudioTranscriptDeltaEvent {
450 pub event_id: String,
451 pub response_id: String,
452 pub item_id: String,
453 pub output_index: u32,
454 pub content_index: u32,
455 pub delta: String,
456}
457
458#[derive(Debug, Clone, Deserialize)]
460pub struct ResponseAudioTranscriptDoneEvent {
461 pub event_id: String,
462 pub response_id: String,
463 pub item_id: String,
464 pub output_index: u32,
465 pub content_index: u32,
466 pub transcript: String,
467}
468
469#[derive(Debug, Clone, Deserialize)]
471pub struct ResponseFunctionCallArgumentsDeltaEvent {
472 pub event_id: String,
473 pub response_id: String,
474 pub item_id: String,
475 pub output_index: u32,
476 pub call_id: String,
477 pub delta: String,
478}
479
480#[derive(Debug, Clone, Deserialize)]
482pub struct ResponseFunctionCallArgumentsDoneEvent {
483 pub event_id: String,
484 pub response_id: String,
485 pub item_id: String,
486 pub output_index: u32,
487 pub call_id: String,
488 pub name: String,
489 pub arguments: String,
490}
491
492#[derive(Debug, Clone, Deserialize)]
496pub struct RateLimitsUpdatedEvent {
497 pub event_id: String,
498 pub rate_limits: Vec<RateLimit>,
499}
500
501#[derive(Debug, Clone, Deserialize)]
503pub struct RateLimit {
504 pub name: String,
505 pub limit: u32,
506 pub remaining: u32,
507 pub reset_seconds: f32,
508}
509
510#[derive(Debug, Clone, Deserialize)]
514pub struct ErrorEvent {
515 pub event_id: String,
516 pub error: RealtimeError,
517}
518
519#[derive(Debug, Clone, Deserialize)]
521pub struct RealtimeError {
522 #[serde(rename = "type")]
523 pub error_type: Option<String>,
524 pub code: Option<String>,
525 pub message: String,
526 #[serde(default)]
527 pub param: Option<String>,
528 #[serde(default)]
529 pub event_id: Option<String>,
530}
531
532#[derive(Debug, Clone, Default, Deserialize)]
536pub struct RealtimeUsage {
537 pub total_tokens: u32,
538 pub input_tokens: u32,
539 pub output_tokens: u32,
540 #[serde(default)]
541 pub input_token_details: Option<InputTokenDetails>,
542 #[serde(default)]
543 pub output_token_details: Option<OutputTokenDetails>,
544}
545
546#[derive(Debug, Clone, Default, Deserialize)]
548pub struct InputTokenDetails {
549 #[serde(default)]
550 pub cached_tokens: u32,
551 #[serde(default)]
552 pub text_tokens: u32,
553 #[serde(default)]
554 pub audio_tokens: u32,
555}
556
557#[derive(Debug, Clone, Default, Deserialize)]
559pub struct OutputTokenDetails {
560 #[serde(default)]
561 pub text_tokens: u32,
562 #[serde(default)]
563 pub audio_tokens: u32,
564}
565
566impl ServerEvent {
567 pub fn is_error(&self) -> bool {
569 matches!(self, Self::Error(_))
570 }
571
572 pub fn event_id(&self) -> Option<&str> {
574 match self {
575 Self::SessionCreated(e) => Some(&e.event_id),
576 Self::SessionUpdated(e) => Some(&e.event_id),
577 Self::ConversationCreated(e) => Some(&e.event_id),
578 Self::ConversationItemCreated(e) => Some(&e.event_id),
579 Self::ConversationItemRetrieved(e) => Some(&e.event_id),
580 Self::ConversationItemDeleted(e) => Some(&e.event_id),
581 Self::ConversationItemTruncated(e) => Some(&e.event_id),
582 Self::InputAudioTranscriptionCompleted(e) => Some(&e.event_id),
583 Self::InputAudioTranscriptionFailed(e) => Some(&e.event_id),
584 Self::InputAudioBufferCommitted(e) => Some(&e.event_id),
585 Self::InputAudioBufferCleared(e) => Some(&e.event_id),
586 Self::InputAudioBufferSpeechStarted(e) => Some(&e.event_id),
587 Self::InputAudioBufferSpeechStopped(e) => Some(&e.event_id),
588 Self::OutputAudioBufferStarted(e) => Some(&e.event_id),
589 Self::OutputAudioBufferStopped(e) => Some(&e.event_id),
590 Self::OutputAudioBufferCleared(e) => Some(&e.event_id),
591 Self::ResponseCreated(e) => Some(&e.event_id),
592 Self::ResponseDone(e) => Some(&e.event_id),
593 Self::ResponseOutputItemAdded(e) => Some(&e.event_id),
594 Self::ResponseOutputItemDone(e) => Some(&e.event_id),
595 Self::ResponseContentPartAdded(e) => Some(&e.event_id),
596 Self::ResponseContentPartDone(e) => Some(&e.event_id),
597 Self::ResponseTextDelta(e) => Some(&e.event_id),
598 Self::ResponseTextDone(e) => Some(&e.event_id),
599 Self::ResponseAudioDelta(e) => Some(&e.event_id),
600 Self::ResponseAudioDone(e) => Some(&e.event_id),
601 Self::ResponseAudioTranscriptDelta(e) => Some(&e.event_id),
602 Self::ResponseAudioTranscriptDone(e) => Some(&e.event_id),
603 Self::ResponseFunctionCallArgumentsDelta(e) => Some(&e.event_id),
604 Self::ResponseFunctionCallArgumentsDone(e) => Some(&e.event_id),
605 Self::RateLimitsUpdated(e) => Some(&e.event_id),
606 Self::Error(e) => Some(&e.event_id),
607 }
608 }
609}