use serde::{Deserialize, Serialize};
use crate::{
event_types::{RealtimeClientEvent, RealtimeServerEvent},
realtime_conversation::RealtimeConversationItem,
realtime_response::{RealtimeResponse, RealtimeResponseCreateParams},
realtime_session::{RealtimeSessionCreateRequest, RealtimeTranscriptionSessionCreateRequest},
};
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum ClientEvent {
#[serde(rename = "session.update")]
SessionUpdate {
session: Box<SessionConfig>,
event_id: Option<String>,
},
#[serde(rename = "conversation.item.create")]
ConversationItemCreate {
item: RealtimeConversationItem,
event_id: Option<String>,
previous_item_id: Option<String>,
},
#[serde(rename = "conversation.item.delete")]
ConversationItemDelete {
item_id: String,
event_id: Option<String>,
},
#[serde(rename = "conversation.item.retrieve")]
ConversationItemRetrieve {
item_id: String,
event_id: Option<String>,
},
#[serde(rename = "conversation.item.truncate")]
ConversationItemTruncate {
audio_end_ms: u32,
content_index: u32,
item_id: String,
event_id: Option<String>,
},
#[serde(rename = "input_audio_buffer.append")]
InputAudioBufferAppend {
audio: String,
event_id: Option<String>,
},
#[serde(rename = "input_audio_buffer.clear")]
InputAudioBufferClear { event_id: Option<String> },
#[serde(rename = "input_audio_buffer.commit")]
InputAudioBufferCommit { event_id: Option<String> },
#[serde(rename = "output_audio_buffer.clear")]
OutputAudioBufferClear { event_id: Option<String> },
#[serde(rename = "response.cancel")]
ResponseCancel {
event_id: Option<String>,
response_id: Option<String>,
},
#[serde(rename = "response.create")]
ResponseCreate {
event_id: Option<String>,
response: Option<Box<RealtimeResponseCreateParams>>,
},
#[serde(other)]
Unknown,
}
impl ClientEvent {
pub fn event_type(&self) -> &str {
self.to_event_type()
.map(|e| e.as_str())
.unwrap_or("unknown")
}
pub fn to_event_type(&self) -> Option<RealtimeClientEvent> {
match self {
ClientEvent::SessionUpdate { .. } => Some(RealtimeClientEvent::SessionUpdate),
ClientEvent::ConversationItemCreate { .. } => {
Some(RealtimeClientEvent::ConversationItemCreate)
}
ClientEvent::ConversationItemDelete { .. } => {
Some(RealtimeClientEvent::ConversationItemDelete)
}
ClientEvent::ConversationItemRetrieve { .. } => {
Some(RealtimeClientEvent::ConversationItemRetrieve)
}
ClientEvent::ConversationItemTruncate { .. } => {
Some(RealtimeClientEvent::ConversationItemTruncate)
}
ClientEvent::InputAudioBufferAppend { .. } => {
Some(RealtimeClientEvent::InputAudioBufferAppend)
}
ClientEvent::InputAudioBufferClear { .. } => {
Some(RealtimeClientEvent::InputAudioBufferClear)
}
ClientEvent::InputAudioBufferCommit { .. } => {
Some(RealtimeClientEvent::InputAudioBufferCommit)
}
ClientEvent::OutputAudioBufferClear { .. } => {
Some(RealtimeClientEvent::OutputAudioBufferClear)
}
ClientEvent::ResponseCancel { .. } => Some(RealtimeClientEvent::ResponseCancel),
ClientEvent::ResponseCreate { .. } => Some(RealtimeClientEvent::ResponseCreate),
ClientEvent::Unknown => None,
}
}
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum ServerEvent {
#[serde(rename = "session.created")]
SessionCreated {
event_id: String,
session: Box<SessionConfig>,
},
#[serde(rename = "session.updated")]
SessionUpdated {
event_id: String,
session: Box<SessionConfig>,
},
#[serde(rename = "conversation.created")]
ConversationCreated {
conversation: Conversation,
event_id: String,
},
#[serde(rename = "conversation.item.created")]
ConversationItemCreated {
event_id: String,
item: RealtimeConversationItem,
previous_item_id: Option<String>,
},
#[serde(rename = "conversation.item.added")]
ConversationItemAdded {
event_id: String,
item: RealtimeConversationItem,
previous_item_id: Option<String>,
},
#[serde(rename = "conversation.item.done")]
ConversationItemDone {
event_id: String,
item: RealtimeConversationItem,
previous_item_id: Option<String>,
},
#[serde(rename = "conversation.item.deleted")]
ConversationItemDeleted { event_id: String, item_id: String },
#[serde(rename = "conversation.item.retrieved")]
ConversationItemRetrieved {
event_id: String,
item: RealtimeConversationItem,
},
#[serde(rename = "conversation.item.truncated")]
ConversationItemTruncated {
audio_end_ms: u32,
content_index: u32,
event_id: String,
item_id: String,
},
#[serde(rename = "conversation.item.input_audio_transcription.completed")]
InputAudioTranscriptionCompleted {
content_index: u32,
event_id: String,
item_id: String,
transcript: String,
usage: TranscriptionUsage,
logprobs: Option<Vec<LogProbProperties>>,
},
#[serde(rename = "conversation.item.input_audio_transcription.delta")]
InputAudioTranscriptionDelta {
event_id: String,
item_id: String,
content_index: Option<u32>,
delta: Option<String>,
logprobs: Option<Vec<LogProbProperties>>,
},
#[serde(rename = "conversation.item.input_audio_transcription.failed")]
InputAudioTranscriptionFailed {
content_index: u32,
error: TranscriptionError,
event_id: String,
item_id: String,
},
#[serde(rename = "conversation.item.input_audio_transcription.segment")]
InputAudioTranscriptionSegment {
id: String,
content_index: u32,
end: f32,
event_id: String,
item_id: String,
speaker: String,
start: f32,
text: String,
},
#[serde(rename = "input_audio_buffer.cleared")]
InputAudioBufferCleared { event_id: String },
#[serde(rename = "input_audio_buffer.committed")]
InputAudioBufferCommitted {
event_id: String,
item_id: String,
previous_item_id: Option<String>,
},
#[serde(rename = "input_audio_buffer.speech_started")]
InputAudioBufferSpeechStarted {
audio_start_ms: u32,
event_id: String,
item_id: String,
},
#[serde(rename = "input_audio_buffer.speech_stopped")]
InputAudioBufferSpeechStopped {
audio_end_ms: u32,
event_id: String,
item_id: String,
},
#[serde(rename = "input_audio_buffer.timeout_triggered")]
InputAudioBufferTimeoutTriggered {
audio_end_ms: u32,
audio_start_ms: u32,
event_id: String,
item_id: String,
},
#[serde(rename = "input_audio_buffer.dtmf_event_received")]
InputAudioBufferDtmfEventReceived { event: String, received_at: i64 },
#[serde(rename = "output_audio_buffer.started")]
OutputAudioBufferStarted {
event_id: String,
response_id: String,
},
#[serde(rename = "output_audio_buffer.stopped")]
OutputAudioBufferStopped {
event_id: String,
response_id: String,
},
#[serde(rename = "output_audio_buffer.cleared")]
OutputAudioBufferCleared {
event_id: String,
response_id: String,
},
#[serde(rename = "response.created")]
ResponseCreated {
event_id: String,
response: Box<RealtimeResponse>,
},
#[serde(rename = "response.done")]
ResponseDone {
event_id: String,
response: Box<RealtimeResponse>,
},
#[serde(rename = "response.output_item.added")]
ResponseOutputItemAdded {
event_id: String,
item: RealtimeConversationItem,
output_index: u32,
response_id: String,
},
#[serde(rename = "response.output_item.done")]
ResponseOutputItemDone {
event_id: String,
item: RealtimeConversationItem,
output_index: u32,
response_id: String,
},
#[serde(rename = "response.content_part.added")]
ResponseContentPartAdded {
content_index: u32,
event_id: String,
item_id: String,
output_index: u32,
part: ResponseContentPart,
response_id: String,
},
#[serde(rename = "response.content_part.done")]
ResponseContentPartDone {
content_index: u32,
event_id: String,
item_id: String,
output_index: u32,
part: ResponseContentPart,
response_id: String,
},
#[serde(rename = "response.output_text.delta")]
ResponseOutputTextDelta {
content_index: u32,
delta: String,
event_id: String,
item_id: String,
output_index: u32,
response_id: String,
},
#[serde(rename = "response.output_text.done")]
ResponseOutputTextDone {
content_index: u32,
event_id: String,
item_id: String,
output_index: u32,
response_id: String,
text: String,
},
#[serde(rename = "response.output_audio.delta")]
ResponseOutputAudioDelta {
content_index: u32,
delta: String,
event_id: String,
item_id: String,
output_index: u32,
response_id: String,
},
#[serde(rename = "response.output_audio.done")]
ResponseOutputAudioDone {
content_index: u32,
event_id: String,
item_id: String,
output_index: u32,
response_id: String,
},
#[serde(rename = "response.output_audio_transcript.delta")]
ResponseOutputAudioTranscriptDelta {
content_index: u32,
delta: String,
event_id: String,
item_id: String,
output_index: u32,
response_id: String,
},
#[serde(rename = "response.output_audio_transcript.done")]
ResponseOutputAudioTranscriptDone {
content_index: u32,
event_id: String,
item_id: String,
output_index: u32,
response_id: String,
transcript: String,
},
#[serde(rename = "response.function_call_arguments.delta")]
ResponseFunctionCallArgumentsDelta {
call_id: String,
delta: String,
event_id: String,
item_id: String,
output_index: u32,
response_id: String,
},
#[serde(rename = "response.function_call_arguments.done")]
ResponseFunctionCallArgumentsDone {
arguments: String,
call_id: String,
event_id: String,
item_id: String,
name: String,
output_index: u32,
response_id: String,
},
#[serde(rename = "response.mcp_call_arguments.delta")]
ResponseMcpCallArgumentsDelta {
delta: String,
event_id: String,
item_id: String,
output_index: u32,
response_id: String,
obfuscation: Option<String>,
},
#[serde(rename = "response.mcp_call_arguments.done")]
ResponseMcpCallArgumentsDone {
arguments: String,
event_id: String,
item_id: String,
output_index: u32,
response_id: String,
},
#[serde(rename = "response.mcp_call.in_progress")]
ResponseMcpCallInProgress {
event_id: String,
item_id: String,
output_index: u32,
},
#[serde(rename = "response.mcp_call.completed")]
ResponseMcpCallCompleted {
event_id: String,
item_id: String,
output_index: u32,
},
#[serde(rename = "response.mcp_call.failed")]
ResponseMcpCallFailed {
event_id: String,
item_id: String,
output_index: u32,
},
#[serde(rename = "mcp_list_tools.in_progress")]
McpListToolsInProgress { event_id: String, item_id: String },
#[serde(rename = "mcp_list_tools.completed")]
McpListToolsCompleted { event_id: String, item_id: String },
#[serde(rename = "mcp_list_tools.failed")]
McpListToolsFailed { event_id: String, item_id: String },
#[serde(rename = "rate_limits.updated")]
RateLimitsUpdated {
event_id: String,
rate_limits: Vec<RealtimeRateLimit>,
},
#[serde(rename = "error")]
Error {
error: RealtimeError,
event_id: String,
},
#[serde(other)]
Unknown,
}
impl ServerEvent {
pub fn event_type(&self) -> &str {
self.to_event_type()
.map(|e| e.as_str())
.unwrap_or("unknown")
}
pub fn to_event_type(&self) -> Option<RealtimeServerEvent> {
match self {
ServerEvent::SessionCreated { .. } => Some(RealtimeServerEvent::SessionCreated),
ServerEvent::SessionUpdated { .. } => Some(RealtimeServerEvent::SessionUpdated),
ServerEvent::ConversationCreated { .. } => {
Some(RealtimeServerEvent::ConversationCreated)
}
ServerEvent::ConversationItemCreated { .. } => {
Some(RealtimeServerEvent::ConversationItemCreated)
}
ServerEvent::ConversationItemAdded { .. } => {
Some(RealtimeServerEvent::ConversationItemAdded)
}
ServerEvent::ConversationItemDone { .. } => {
Some(RealtimeServerEvent::ConversationItemDone)
}
ServerEvent::ConversationItemDeleted { .. } => {
Some(RealtimeServerEvent::ConversationItemDeleted)
}
ServerEvent::ConversationItemRetrieved { .. } => {
Some(RealtimeServerEvent::ConversationItemRetrieved)
}
ServerEvent::ConversationItemTruncated { .. } => {
Some(RealtimeServerEvent::ConversationItemTruncated)
}
ServerEvent::InputAudioTranscriptionCompleted { .. } => {
Some(RealtimeServerEvent::ConversationItemInputAudioTranscriptionCompleted)
}
ServerEvent::InputAudioTranscriptionDelta { .. } => {
Some(RealtimeServerEvent::ConversationItemInputAudioTranscriptionDelta)
}
ServerEvent::InputAudioTranscriptionFailed { .. } => {
Some(RealtimeServerEvent::ConversationItemInputAudioTranscriptionFailed)
}
ServerEvent::InputAudioTranscriptionSegment { .. } => {
Some(RealtimeServerEvent::ConversationItemInputAudioTranscriptionSegment)
}
ServerEvent::InputAudioBufferCleared { .. } => {
Some(RealtimeServerEvent::InputAudioBufferCleared)
}
ServerEvent::InputAudioBufferCommitted { .. } => {
Some(RealtimeServerEvent::InputAudioBufferCommitted)
}
ServerEvent::InputAudioBufferSpeechStarted { .. } => {
Some(RealtimeServerEvent::InputAudioBufferSpeechStarted)
}
ServerEvent::InputAudioBufferSpeechStopped { .. } => {
Some(RealtimeServerEvent::InputAudioBufferSpeechStopped)
}
ServerEvent::InputAudioBufferTimeoutTriggered { .. } => {
Some(RealtimeServerEvent::InputAudioBufferTimeoutTriggered)
}
ServerEvent::InputAudioBufferDtmfEventReceived { .. } => {
Some(RealtimeServerEvent::InputAudioBufferDtmfEventReceived)
}
ServerEvent::OutputAudioBufferStarted { .. } => {
Some(RealtimeServerEvent::OutputAudioBufferStarted)
}
ServerEvent::OutputAudioBufferStopped { .. } => {
Some(RealtimeServerEvent::OutputAudioBufferStopped)
}
ServerEvent::OutputAudioBufferCleared { .. } => {
Some(RealtimeServerEvent::OutputAudioBufferCleared)
}
ServerEvent::ResponseCreated { .. } => Some(RealtimeServerEvent::ResponseCreated),
ServerEvent::ResponseDone { .. } => Some(RealtimeServerEvent::ResponseDone),
ServerEvent::ResponseOutputItemAdded { .. } => {
Some(RealtimeServerEvent::ResponseOutputItemAdded)
}
ServerEvent::ResponseOutputItemDone { .. } => {
Some(RealtimeServerEvent::ResponseOutputItemDone)
}
ServerEvent::ResponseContentPartAdded { .. } => {
Some(RealtimeServerEvent::ResponseContentPartAdded)
}
ServerEvent::ResponseContentPartDone { .. } => {
Some(RealtimeServerEvent::ResponseContentPartDone)
}
ServerEvent::ResponseOutputTextDelta { .. } => {
Some(RealtimeServerEvent::ResponseOutputTextDelta)
}
ServerEvent::ResponseOutputTextDone { .. } => {
Some(RealtimeServerEvent::ResponseOutputTextDone)
}
ServerEvent::ResponseOutputAudioDelta { .. } => {
Some(RealtimeServerEvent::ResponseOutputAudioDelta)
}
ServerEvent::ResponseOutputAudioDone { .. } => {
Some(RealtimeServerEvent::ResponseOutputAudioDone)
}
ServerEvent::ResponseOutputAudioTranscriptDelta { .. } => {
Some(RealtimeServerEvent::ResponseOutputAudioTranscriptDelta)
}
ServerEvent::ResponseOutputAudioTranscriptDone { .. } => {
Some(RealtimeServerEvent::ResponseOutputAudioTranscriptDone)
}
ServerEvent::ResponseFunctionCallArgumentsDelta { .. } => {
Some(RealtimeServerEvent::ResponseFunctionCallArgumentsDelta)
}
ServerEvent::ResponseFunctionCallArgumentsDone { .. } => {
Some(RealtimeServerEvent::ResponseFunctionCallArgumentsDone)
}
ServerEvent::ResponseMcpCallArgumentsDelta { .. } => {
Some(RealtimeServerEvent::ResponseMcpCallArgumentsDelta)
}
ServerEvent::ResponseMcpCallArgumentsDone { .. } => {
Some(RealtimeServerEvent::ResponseMcpCallArgumentsDone)
}
ServerEvent::ResponseMcpCallInProgress { .. } => {
Some(RealtimeServerEvent::ResponseMcpCallInProgress)
}
ServerEvent::ResponseMcpCallCompleted { .. } => {
Some(RealtimeServerEvent::ResponseMcpCallCompleted)
}
ServerEvent::ResponseMcpCallFailed { .. } => {
Some(RealtimeServerEvent::ResponseMcpCallFailed)
}
ServerEvent::McpListToolsInProgress { .. } => {
Some(RealtimeServerEvent::McpListToolsInProgress)
}
ServerEvent::McpListToolsCompleted { .. } => {
Some(RealtimeServerEvent::McpListToolsCompleted)
}
ServerEvent::McpListToolsFailed { .. } => Some(RealtimeServerEvent::McpListToolsFailed),
ServerEvent::RateLimitsUpdated { .. } => Some(RealtimeServerEvent::RateLimitsUpdated),
ServerEvent::Error { .. } => Some(RealtimeServerEvent::Error),
ServerEvent::Unknown => None,
}
}
pub fn is_function_call_done(&self) -> bool {
matches!(self, ServerEvent::ResponseFunctionCallArgumentsDone { .. })
}
pub fn get_function_call(&self) -> Option<(&str, &str, &str)> {
match self {
ServerEvent::ResponseFunctionCallArgumentsDone {
call_id,
item_id,
arguments,
..
} => Some((call_id, item_id, arguments)),
_ => None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum SessionConfig {
#[serde(rename = "realtime")]
Realtime(Box<RealtimeSessionCreateRequest>),
#[serde(rename = "transcription")]
Transcription(Box<RealtimeTranscriptionSessionCreateRequest>),
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Conversation {
pub id: Option<String>,
pub object: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResponseContentPart {
pub audio: Option<String>,
pub text: Option<String>,
pub transcript: Option<String>,
#[serde(rename = "type")]
pub r#type: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LogProbProperties {
pub token: String,
pub bytes: Vec<u8>,
pub logprob: f64,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TranscriptionTokenInputDetails {
pub audio_tokens: Option<u32>,
pub text_tokens: Option<u32>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum TranscriptionUsage {
#[serde(rename = "tokens")]
Tokens {
input_tokens: u32,
output_tokens: u32,
total_tokens: u32,
input_token_details: Option<TranscriptionTokenInputDetails>,
},
#[serde(rename = "duration")]
Duration { seconds: f64 },
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TranscriptionError {
pub code: Option<String>,
pub message: Option<String>,
pub param: Option<String>,
#[serde(rename = "type")]
pub r#type: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeRateLimit {
pub limit: Option<u32>,
pub name: Option<String>,
pub remaining: Option<u32>,
pub reset_seconds: Option<f64>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeError {
pub message: String,
#[serde(rename = "type")]
pub r#type: String,
pub code: Option<String>,
pub event_id: Option<String>,
pub param: Option<String>,
}