1use serde::{Deserialize, Serialize};
14
15use crate::{
16 event_types::{RealtimeClientEvent, RealtimeServerEvent},
17 realtime_conversation::RealtimeConversationItem,
18 realtime_response::{RealtimeResponse, RealtimeResponseCreateParams},
19 realtime_session::{RealtimeSessionCreateRequest, RealtimeTranscriptionSessionCreateRequest},
20};
21
22#[serde_with::skip_serializing_none]
34#[derive(Debug, Clone, Serialize, Deserialize)]
35#[serde(tag = "type")]
36pub enum ClientEvent {
37 #[serde(rename = "session.update")]
40 SessionUpdate {
41 session: Box<SessionConfig>,
42 event_id: Option<String>,
43 },
44
45 #[serde(rename = "conversation.item.create")]
48 ConversationItemCreate {
49 item: RealtimeConversationItem,
50 event_id: Option<String>,
51 previous_item_id: Option<String>,
52 },
53
54 #[serde(rename = "conversation.item.delete")]
56 ConversationItemDelete {
57 item_id: String,
58 event_id: Option<String>,
59 },
60
61 #[serde(rename = "conversation.item.retrieve")]
63 ConversationItemRetrieve {
64 item_id: String,
65 event_id: Option<String>,
66 },
67
68 #[serde(rename = "conversation.item.truncate")]
70 ConversationItemTruncate {
71 audio_end_ms: u32,
72 content_index: u32,
73 item_id: String,
74 event_id: Option<String>,
75 },
76
77 #[serde(rename = "input_audio_buffer.append")]
84 InputAudioBufferAppend {
85 audio: String,
86 event_id: Option<String>,
87 },
88
89 #[serde(rename = "input_audio_buffer.clear")]
91 InputAudioBufferClear { event_id: Option<String> },
92
93 #[serde(rename = "input_audio_buffer.commit")]
95 InputAudioBufferCommit { event_id: Option<String> },
96
97 #[serde(rename = "output_audio_buffer.clear")]
100 OutputAudioBufferClear { event_id: Option<String> },
101
102 #[serde(rename = "response.cancel")]
105 ResponseCancel {
106 event_id: Option<String>,
107 response_id: Option<String>,
108 },
109
110 #[serde(rename = "response.create")]
112 ResponseCreate {
113 event_id: Option<String>,
114 response: Option<Box<RealtimeResponseCreateParams>>,
115 },
116
117 #[serde(other)]
122 Unknown,
123}
124
125impl ClientEvent {
126 pub fn event_type(&self) -> &str {
130 self.to_event_type()
131 .map(|e| e.as_str())
132 .unwrap_or("unknown")
133 }
134
135 pub fn to_event_type(&self) -> Option<RealtimeClientEvent> {
139 match self {
140 ClientEvent::SessionUpdate { .. } => Some(RealtimeClientEvent::SessionUpdate),
141 ClientEvent::ConversationItemCreate { .. } => {
142 Some(RealtimeClientEvent::ConversationItemCreate)
143 }
144 ClientEvent::ConversationItemDelete { .. } => {
145 Some(RealtimeClientEvent::ConversationItemDelete)
146 }
147 ClientEvent::ConversationItemRetrieve { .. } => {
148 Some(RealtimeClientEvent::ConversationItemRetrieve)
149 }
150 ClientEvent::ConversationItemTruncate { .. } => {
151 Some(RealtimeClientEvent::ConversationItemTruncate)
152 }
153 ClientEvent::InputAudioBufferAppend { .. } => {
154 Some(RealtimeClientEvent::InputAudioBufferAppend)
155 }
156 ClientEvent::InputAudioBufferClear { .. } => {
157 Some(RealtimeClientEvent::InputAudioBufferClear)
158 }
159 ClientEvent::InputAudioBufferCommit { .. } => {
160 Some(RealtimeClientEvent::InputAudioBufferCommit)
161 }
162 ClientEvent::OutputAudioBufferClear { .. } => {
163 Some(RealtimeClientEvent::OutputAudioBufferClear)
164 }
165 ClientEvent::ResponseCancel { .. } => Some(RealtimeClientEvent::ResponseCancel),
166 ClientEvent::ResponseCreate { .. } => Some(RealtimeClientEvent::ResponseCreate),
167 ClientEvent::Unknown => None,
168 }
169 }
170}
171
172#[serde_with::skip_serializing_none]
184#[derive(Debug, Clone, Serialize, Deserialize)]
185#[serde(tag = "type")]
186pub enum ServerEvent {
187 #[serde(rename = "session.created")]
190 SessionCreated {
191 event_id: String,
192 session: Box<SessionConfig>,
193 },
194
195 #[serde(rename = "session.updated")]
197 SessionUpdated {
198 event_id: String,
199 session: Box<SessionConfig>,
200 },
201
202 #[serde(rename = "conversation.created")]
205 ConversationCreated {
206 conversation: Conversation,
207 event_id: String,
208 },
209
210 #[serde(rename = "conversation.item.created")]
212 ConversationItemCreated {
213 event_id: String,
214 item: RealtimeConversationItem,
215 previous_item_id: Option<String>,
216 },
217
218 #[serde(rename = "conversation.item.added")]
220 ConversationItemAdded {
221 event_id: String,
222 item: RealtimeConversationItem,
223 previous_item_id: Option<String>,
224 },
225
226 #[serde(rename = "conversation.item.done")]
228 ConversationItemDone {
229 event_id: String,
230 item: RealtimeConversationItem,
231 previous_item_id: Option<String>,
232 },
233
234 #[serde(rename = "conversation.item.deleted")]
236 ConversationItemDeleted { event_id: String, item_id: String },
237
238 #[serde(rename = "conversation.item.retrieved")]
240 ConversationItemRetrieved {
241 event_id: String,
242 item: RealtimeConversationItem,
243 },
244
245 #[serde(rename = "conversation.item.truncated")]
247 ConversationItemTruncated {
248 audio_end_ms: u32,
249 content_index: u32,
250 event_id: String,
251 item_id: String,
252 },
253
254 #[serde(rename = "conversation.item.input_audio_transcription.completed")]
257 InputAudioTranscriptionCompleted {
258 content_index: u32,
259 event_id: String,
260 item_id: String,
261 transcript: String,
262 usage: TranscriptionUsage,
263 logprobs: Option<Vec<LogProbProperties>>,
264 },
265
266 #[serde(rename = "conversation.item.input_audio_transcription.delta")]
268 InputAudioTranscriptionDelta {
269 event_id: String,
270 item_id: String,
271 content_index: Option<u32>,
272 delta: Option<String>,
273 logprobs: Option<Vec<LogProbProperties>>,
274 },
275
276 #[serde(rename = "conversation.item.input_audio_transcription.failed")]
278 InputAudioTranscriptionFailed {
279 content_index: u32,
280 error: TranscriptionError,
281 event_id: String,
282 item_id: String,
283 },
284
285 #[serde(rename = "conversation.item.input_audio_transcription.segment")]
288 InputAudioTranscriptionSegment {
289 id: String,
290 content_index: u32,
291 end: f32,
292 event_id: String,
293 item_id: String,
294 speaker: String,
295 start: f32,
296 text: String,
297 },
298
299 #[serde(rename = "input_audio_buffer.cleared")]
302 InputAudioBufferCleared { event_id: String },
303
304 #[serde(rename = "input_audio_buffer.committed")]
306 InputAudioBufferCommitted {
307 event_id: String,
308 item_id: String,
309 previous_item_id: Option<String>,
310 },
311
312 #[serde(rename = "input_audio_buffer.speech_started")]
314 InputAudioBufferSpeechStarted {
315 audio_start_ms: u32,
316 event_id: String,
317 item_id: String,
318 },
319
320 #[serde(rename = "input_audio_buffer.speech_stopped")]
322 InputAudioBufferSpeechStopped {
323 audio_end_ms: u32,
324 event_id: String,
325 item_id: String,
326 },
327
328 #[serde(rename = "input_audio_buffer.timeout_triggered")]
330 InputAudioBufferTimeoutTriggered {
331 audio_end_ms: u32,
332 audio_start_ms: u32,
333 event_id: String,
334 item_id: String,
335 },
336
337 #[serde(rename = "input_audio_buffer.dtmf_event_received")]
343 InputAudioBufferDtmfEventReceived { event: String, received_at: i64 },
344
345 #[serde(rename = "output_audio_buffer.started")]
348 OutputAudioBufferStarted {
349 event_id: String,
350 response_id: String,
351 },
352
353 #[serde(rename = "output_audio_buffer.stopped")]
355 OutputAudioBufferStopped {
356 event_id: String,
357 response_id: String,
358 },
359
360 #[serde(rename = "output_audio_buffer.cleared")]
363 OutputAudioBufferCleared {
364 event_id: String,
365 response_id: String,
366 },
367
368 #[serde(rename = "response.created")]
371 ResponseCreated {
372 event_id: String,
373 response: Box<RealtimeResponse>,
374 },
375
376 #[serde(rename = "response.done")]
378 ResponseDone {
379 event_id: String,
380 response: Box<RealtimeResponse>,
381 },
382
383 #[serde(rename = "response.output_item.added")]
386 ResponseOutputItemAdded {
387 event_id: String,
388 item: RealtimeConversationItem,
389 output_index: u32,
390 response_id: String,
391 },
392
393 #[serde(rename = "response.output_item.done")]
395 ResponseOutputItemDone {
396 event_id: String,
397 item: RealtimeConversationItem,
398 output_index: u32,
399 response_id: String,
400 },
401
402 #[serde(rename = "response.content_part.added")]
405 ResponseContentPartAdded {
406 content_index: u32,
407 event_id: String,
408 item_id: String,
409 output_index: u32,
410 part: ResponseContentPart,
411 response_id: String,
412 },
413
414 #[serde(rename = "response.content_part.done")]
416 ResponseContentPartDone {
417 content_index: u32,
418 event_id: String,
419 item_id: String,
420 output_index: u32,
421 part: ResponseContentPart,
422 response_id: String,
423 },
424
425 #[serde(rename = "response.output_text.delta")]
428 ResponseOutputTextDelta {
429 content_index: u32,
430 delta: String,
431 event_id: String,
432 item_id: String,
433 output_index: u32,
434 response_id: String,
435 },
436
437 #[serde(rename = "response.output_text.done")]
439 ResponseOutputTextDone {
440 content_index: u32,
441 event_id: String,
442 item_id: String,
443 output_index: u32,
444 response_id: String,
445 text: String,
446 },
447
448 #[serde(rename = "response.output_audio.delta")]
454 ResponseOutputAudioDelta {
455 content_index: u32,
456 delta: String,
457 event_id: String,
458 item_id: String,
459 output_index: u32,
460 response_id: String,
461 },
462
463 #[serde(rename = "response.output_audio.done")]
465 ResponseOutputAudioDone {
466 content_index: u32,
467 event_id: String,
468 item_id: String,
469 output_index: u32,
470 response_id: String,
471 },
472
473 #[serde(rename = "response.output_audio_transcript.delta")]
476 ResponseOutputAudioTranscriptDelta {
477 content_index: u32,
478 delta: String,
479 event_id: String,
480 item_id: String,
481 output_index: u32,
482 response_id: String,
483 },
484
485 #[serde(rename = "response.output_audio_transcript.done")]
487 ResponseOutputAudioTranscriptDone {
488 content_index: u32,
489 event_id: String,
490 item_id: String,
491 output_index: u32,
492 response_id: String,
493 transcript: String,
494 },
495
496 #[serde(rename = "response.function_call_arguments.delta")]
499 ResponseFunctionCallArgumentsDelta {
500 call_id: String,
501 delta: String,
502 event_id: String,
503 item_id: String,
504 output_index: u32,
505 response_id: String,
506 },
507
508 #[serde(rename = "response.function_call_arguments.done")]
510 ResponseFunctionCallArgumentsDone {
511 arguments: String,
512 call_id: String,
513 event_id: String,
514 item_id: String,
515 name: String,
516 output_index: u32,
517 response_id: String,
518 },
519
520 #[serde(rename = "response.mcp_call_arguments.delta")]
523 ResponseMcpCallArgumentsDelta {
524 delta: String,
525 event_id: String,
526 item_id: String,
527 output_index: u32,
528 response_id: String,
529 obfuscation: Option<String>,
530 },
531
532 #[serde(rename = "response.mcp_call_arguments.done")]
534 ResponseMcpCallArgumentsDone {
535 arguments: String,
536 event_id: String,
537 item_id: String,
538 output_index: u32,
539 response_id: String,
540 },
541
542 #[serde(rename = "response.mcp_call.in_progress")]
544 ResponseMcpCallInProgress {
545 event_id: String,
546 item_id: String,
547 output_index: u32,
548 },
549
550 #[serde(rename = "response.mcp_call.completed")]
552 ResponseMcpCallCompleted {
553 event_id: String,
554 item_id: String,
555 output_index: u32,
556 },
557
558 #[serde(rename = "response.mcp_call.failed")]
560 ResponseMcpCallFailed {
561 event_id: String,
562 item_id: String,
563 output_index: u32,
564 },
565
566 #[serde(rename = "mcp_list_tools.in_progress")]
569 McpListToolsInProgress { event_id: String, item_id: String },
570
571 #[serde(rename = "mcp_list_tools.completed")]
573 McpListToolsCompleted { event_id: String, item_id: String },
574
575 #[serde(rename = "mcp_list_tools.failed")]
577 McpListToolsFailed { event_id: String, item_id: String },
578
579 #[serde(rename = "rate_limits.updated")]
582 RateLimitsUpdated {
583 event_id: String,
584 rate_limits: Vec<RealtimeRateLimit>,
585 },
586
587 #[serde(rename = "error")]
590 Error {
591 error: RealtimeError,
592 event_id: String,
593 },
594
595 #[serde(other)]
600 Unknown,
601}
602
603impl ServerEvent {
604 pub fn event_type(&self) -> &str {
609 self.to_event_type()
610 .map(|e| e.as_str())
611 .unwrap_or("unknown")
612 }
613
614 pub fn to_event_type(&self) -> Option<RealtimeServerEvent> {
618 match self {
619 ServerEvent::SessionCreated { .. } => Some(RealtimeServerEvent::SessionCreated),
620 ServerEvent::SessionUpdated { .. } => Some(RealtimeServerEvent::SessionUpdated),
621 ServerEvent::ConversationCreated { .. } => {
622 Some(RealtimeServerEvent::ConversationCreated)
623 }
624 ServerEvent::ConversationItemCreated { .. } => {
625 Some(RealtimeServerEvent::ConversationItemCreated)
626 }
627 ServerEvent::ConversationItemAdded { .. } => {
628 Some(RealtimeServerEvent::ConversationItemAdded)
629 }
630 ServerEvent::ConversationItemDone { .. } => {
631 Some(RealtimeServerEvent::ConversationItemDone)
632 }
633 ServerEvent::ConversationItemDeleted { .. } => {
634 Some(RealtimeServerEvent::ConversationItemDeleted)
635 }
636 ServerEvent::ConversationItemRetrieved { .. } => {
637 Some(RealtimeServerEvent::ConversationItemRetrieved)
638 }
639 ServerEvent::ConversationItemTruncated { .. } => {
640 Some(RealtimeServerEvent::ConversationItemTruncated)
641 }
642 ServerEvent::InputAudioTranscriptionCompleted { .. } => {
643 Some(RealtimeServerEvent::ConversationItemInputAudioTranscriptionCompleted)
644 }
645 ServerEvent::InputAudioTranscriptionDelta { .. } => {
646 Some(RealtimeServerEvent::ConversationItemInputAudioTranscriptionDelta)
647 }
648 ServerEvent::InputAudioTranscriptionFailed { .. } => {
649 Some(RealtimeServerEvent::ConversationItemInputAudioTranscriptionFailed)
650 }
651 ServerEvent::InputAudioTranscriptionSegment { .. } => {
652 Some(RealtimeServerEvent::ConversationItemInputAudioTranscriptionSegment)
653 }
654 ServerEvent::InputAudioBufferCleared { .. } => {
655 Some(RealtimeServerEvent::InputAudioBufferCleared)
656 }
657 ServerEvent::InputAudioBufferCommitted { .. } => {
658 Some(RealtimeServerEvent::InputAudioBufferCommitted)
659 }
660 ServerEvent::InputAudioBufferSpeechStarted { .. } => {
661 Some(RealtimeServerEvent::InputAudioBufferSpeechStarted)
662 }
663 ServerEvent::InputAudioBufferSpeechStopped { .. } => {
664 Some(RealtimeServerEvent::InputAudioBufferSpeechStopped)
665 }
666 ServerEvent::InputAudioBufferTimeoutTriggered { .. } => {
667 Some(RealtimeServerEvent::InputAudioBufferTimeoutTriggered)
668 }
669 ServerEvent::InputAudioBufferDtmfEventReceived { .. } => {
670 Some(RealtimeServerEvent::InputAudioBufferDtmfEventReceived)
671 }
672 ServerEvent::OutputAudioBufferStarted { .. } => {
673 Some(RealtimeServerEvent::OutputAudioBufferStarted)
674 }
675 ServerEvent::OutputAudioBufferStopped { .. } => {
676 Some(RealtimeServerEvent::OutputAudioBufferStopped)
677 }
678 ServerEvent::OutputAudioBufferCleared { .. } => {
679 Some(RealtimeServerEvent::OutputAudioBufferCleared)
680 }
681 ServerEvent::ResponseCreated { .. } => Some(RealtimeServerEvent::ResponseCreated),
682 ServerEvent::ResponseDone { .. } => Some(RealtimeServerEvent::ResponseDone),
683 ServerEvent::ResponseOutputItemAdded { .. } => {
684 Some(RealtimeServerEvent::ResponseOutputItemAdded)
685 }
686 ServerEvent::ResponseOutputItemDone { .. } => {
687 Some(RealtimeServerEvent::ResponseOutputItemDone)
688 }
689 ServerEvent::ResponseContentPartAdded { .. } => {
690 Some(RealtimeServerEvent::ResponseContentPartAdded)
691 }
692 ServerEvent::ResponseContentPartDone { .. } => {
693 Some(RealtimeServerEvent::ResponseContentPartDone)
694 }
695 ServerEvent::ResponseOutputTextDelta { .. } => {
696 Some(RealtimeServerEvent::ResponseOutputTextDelta)
697 }
698 ServerEvent::ResponseOutputTextDone { .. } => {
699 Some(RealtimeServerEvent::ResponseOutputTextDone)
700 }
701 ServerEvent::ResponseOutputAudioDelta { .. } => {
702 Some(RealtimeServerEvent::ResponseOutputAudioDelta)
703 }
704 ServerEvent::ResponseOutputAudioDone { .. } => {
705 Some(RealtimeServerEvent::ResponseOutputAudioDone)
706 }
707 ServerEvent::ResponseOutputAudioTranscriptDelta { .. } => {
708 Some(RealtimeServerEvent::ResponseOutputAudioTranscriptDelta)
709 }
710 ServerEvent::ResponseOutputAudioTranscriptDone { .. } => {
711 Some(RealtimeServerEvent::ResponseOutputAudioTranscriptDone)
712 }
713 ServerEvent::ResponseFunctionCallArgumentsDelta { .. } => {
714 Some(RealtimeServerEvent::ResponseFunctionCallArgumentsDelta)
715 }
716 ServerEvent::ResponseFunctionCallArgumentsDone { .. } => {
717 Some(RealtimeServerEvent::ResponseFunctionCallArgumentsDone)
718 }
719 ServerEvent::ResponseMcpCallArgumentsDelta { .. } => {
720 Some(RealtimeServerEvent::ResponseMcpCallArgumentsDelta)
721 }
722 ServerEvent::ResponseMcpCallArgumentsDone { .. } => {
723 Some(RealtimeServerEvent::ResponseMcpCallArgumentsDone)
724 }
725 ServerEvent::ResponseMcpCallInProgress { .. } => {
726 Some(RealtimeServerEvent::ResponseMcpCallInProgress)
727 }
728 ServerEvent::ResponseMcpCallCompleted { .. } => {
729 Some(RealtimeServerEvent::ResponseMcpCallCompleted)
730 }
731 ServerEvent::ResponseMcpCallFailed { .. } => {
732 Some(RealtimeServerEvent::ResponseMcpCallFailed)
733 }
734 ServerEvent::McpListToolsInProgress { .. } => {
735 Some(RealtimeServerEvent::McpListToolsInProgress)
736 }
737 ServerEvent::McpListToolsCompleted { .. } => {
738 Some(RealtimeServerEvent::McpListToolsCompleted)
739 }
740 ServerEvent::McpListToolsFailed { .. } => Some(RealtimeServerEvent::McpListToolsFailed),
741 ServerEvent::RateLimitsUpdated { .. } => Some(RealtimeServerEvent::RateLimitsUpdated),
742 ServerEvent::Error { .. } => Some(RealtimeServerEvent::Error),
743 ServerEvent::Unknown => None,
744 }
745 }
746
747 pub fn is_function_call_done(&self) -> bool {
749 matches!(self, ServerEvent::ResponseFunctionCallArgumentsDone { .. })
750 }
751
752 pub fn get_function_call(&self) -> Option<(&str, &str, &str)> {
754 match self {
755 ServerEvent::ResponseFunctionCallArgumentsDone {
756 call_id,
757 item_id,
758 arguments,
759 ..
760 } => Some((call_id, item_id, arguments)),
761 _ => None,
762 }
763 }
764}
765
766#[derive(Debug, Clone, Serialize, Deserialize)]
775#[serde(tag = "type")]
776pub enum SessionConfig {
777 #[serde(rename = "realtime")]
778 Realtime(Box<RealtimeSessionCreateRequest>),
779 #[serde(rename = "transcription")]
780 Transcription(Box<RealtimeTranscriptionSessionCreateRequest>),
781}
782
783#[serde_with::skip_serializing_none]
789#[derive(Debug, Clone, Serialize, Deserialize)]
790pub struct Conversation {
791 pub id: Option<String>,
792 pub object: Option<String>,
793}
794
795#[serde_with::skip_serializing_none]
797#[derive(Debug, Clone, Serialize, Deserialize)]
798pub struct ResponseContentPart {
799 pub audio: Option<String>,
800 pub text: Option<String>,
801 pub transcript: Option<String>,
802 #[serde(rename = "type")]
803 pub r#type: Option<String>,
804}
805
806#[derive(Debug, Clone, Serialize, Deserialize)]
808pub struct LogProbProperties {
809 pub token: String,
810 pub bytes: Vec<u8>,
813 pub logprob: f64,
814}
815
816#[serde_with::skip_serializing_none]
818#[derive(Debug, Clone, Serialize, Deserialize)]
819pub struct TranscriptionTokenInputDetails {
820 pub audio_tokens: Option<u32>,
821 pub text_tokens: Option<u32>,
822}
823
824#[serde_with::skip_serializing_none]
828#[derive(Debug, Clone, Serialize, Deserialize)]
829#[serde(tag = "type")]
830pub enum TranscriptionUsage {
831 #[serde(rename = "tokens")]
833 Tokens {
834 input_tokens: u32,
835 output_tokens: u32,
836 total_tokens: u32,
837 input_token_details: Option<TranscriptionTokenInputDetails>,
838 },
839 #[serde(rename = "duration")]
841 Duration { seconds: f64 },
842}
843
844#[serde_with::skip_serializing_none]
846#[derive(Debug, Clone, Serialize, Deserialize)]
847pub struct TranscriptionError {
848 pub code: Option<String>,
849 pub message: Option<String>,
850 pub param: Option<String>,
851 #[serde(rename = "type")]
852 pub r#type: Option<String>,
853}
854
855#[serde_with::skip_serializing_none]
857#[derive(Debug, Clone, Serialize, Deserialize)]
858pub struct RealtimeRateLimit {
859 pub limit: Option<u32>,
860 pub name: Option<String>,
861 pub remaining: Option<u32>,
862 pub reset_seconds: Option<f64>,
863}
864
865#[serde_with::skip_serializing_none]
867#[derive(Debug, Clone, Serialize, Deserialize)]
868pub struct RealtimeError {
869 pub message: String,
870 #[serde(rename = "type")]
871 pub r#type: String,
872 pub code: Option<String>,
873 pub event_id: Option<String>,
874 pub param: Option<String>,
875}