use serde::{Deserialize, Serialize};
use crate::realtime::audio::{AudioFormat, Voice};
use crate::realtime::conversation::ItemStatus;
use crate::realtime::session::{MaxTokens, Modality, RealtimeTool, ToolChoice};
use crate::realtime::vad::TurnDetection;
#[derive(Debug, Clone, Deserialize)]
#[serde(tag = "type")]
pub enum ServerEvent {
#[serde(rename = "session.created")]
SessionCreated(SessionCreatedEvent),
#[serde(rename = "session.updated")]
SessionUpdated(SessionUpdatedEvent),
#[serde(rename = "conversation.created")]
ConversationCreated(ConversationCreatedEvent),
#[serde(rename = "conversation.item.created")]
ConversationItemCreated(ConversationItemCreatedEvent),
#[serde(rename = "conversation.item.retrieved")]
ConversationItemRetrieved(ConversationItemRetrievedEvent),
#[serde(rename = "conversation.item.deleted")]
ConversationItemDeleted(ConversationItemDeletedEvent),
#[serde(rename = "conversation.item.truncated")]
ConversationItemTruncated(ConversationItemTruncatedEvent),
#[serde(rename = "conversation.item.input_audio_transcription.completed")]
InputAudioTranscriptionCompleted(InputAudioTranscriptionCompletedEvent),
#[serde(rename = "conversation.item.input_audio_transcription.failed")]
InputAudioTranscriptionFailed(InputAudioTranscriptionFailedEvent),
#[serde(rename = "input_audio_buffer.committed")]
InputAudioBufferCommitted(InputAudioBufferCommittedEvent),
#[serde(rename = "input_audio_buffer.cleared")]
InputAudioBufferCleared(InputAudioBufferClearedEvent),
#[serde(rename = "input_audio_buffer.speech_started")]
InputAudioBufferSpeechStarted(SpeechStartedEvent),
#[serde(rename = "input_audio_buffer.speech_stopped")]
InputAudioBufferSpeechStopped(SpeechStoppedEvent),
#[serde(rename = "output_audio_buffer.started")]
OutputAudioBufferStarted(OutputAudioBufferEvent),
#[serde(rename = "output_audio_buffer.stopped")]
OutputAudioBufferStopped(OutputAudioBufferStoppedEvent),
#[serde(rename = "output_audio_buffer.cleared")]
OutputAudioBufferCleared(OutputAudioBufferEvent),
#[serde(rename = "response.created")]
ResponseCreated(ResponseCreatedEvent),
#[serde(rename = "response.done")]
ResponseDone(ResponseDoneEvent),
#[serde(rename = "response.output_item.added")]
ResponseOutputItemAdded(ResponseOutputItemEvent),
#[serde(rename = "response.output_item.done")]
ResponseOutputItemDone(ResponseOutputItemEvent),
#[serde(rename = "response.content_part.added")]
ResponseContentPartAdded(ResponseContentPartEvent),
#[serde(rename = "response.content_part.done")]
ResponseContentPartDone(ResponseContentPartEvent),
#[serde(rename = "response.text.delta")]
ResponseTextDelta(ResponseTextDeltaEvent),
#[serde(rename = "response.text.done")]
ResponseTextDone(ResponseTextDoneEvent),
#[serde(rename = "response.audio.delta")]
ResponseAudioDelta(ResponseAudioDeltaEvent),
#[serde(rename = "response.audio.done")]
ResponseAudioDone(ResponseAudioDoneEvent),
#[serde(rename = "response.audio_transcript.delta")]
ResponseAudioTranscriptDelta(ResponseAudioTranscriptDeltaEvent),
#[serde(rename = "response.audio_transcript.done")]
ResponseAudioTranscriptDone(ResponseAudioTranscriptDoneEvent),
#[serde(rename = "response.function_call_arguments.delta")]
ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDeltaEvent),
#[serde(rename = "response.function_call_arguments.done")]
ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent),
#[serde(rename = "rate_limits.updated")]
RateLimitsUpdated(RateLimitsUpdatedEvent),
#[serde(rename = "error")]
Error(ErrorEvent),
}
#[derive(Debug, Clone, Deserialize)]
pub struct SessionCreatedEvent {
pub event_id: String,
pub session: SessionInfo,
}
#[derive(Debug, Clone, Deserialize)]
pub struct SessionUpdatedEvent {
pub event_id: String,
pub session: SessionInfo,
}
#[derive(Debug, Clone, Deserialize)]
pub struct SessionInfo {
pub id: String,
pub object: String,
pub model: String,
#[serde(default)]
pub modalities: Vec<Modality>,
#[serde(default)]
pub instructions: String,
pub voice: Option<Voice>,
pub input_audio_format: Option<AudioFormat>,
pub output_audio_format: Option<AudioFormat>,
pub turn_detection: Option<TurnDetection>,
#[serde(default)]
pub tools: Vec<RealtimeTool>,
pub tool_choice: Option<ToolChoice>,
pub temperature: Option<f32>,
pub max_response_output_tokens: Option<MaxTokens>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ConversationCreatedEvent {
pub event_id: String,
pub conversation: ConversationInfo,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ConversationInfo {
pub id: String,
pub object: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ConversationItemCreatedEvent {
pub event_id: String,
#[serde(default)]
pub previous_item_id: Option<String>,
pub item: ResponseItem,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ConversationItemRetrievedEvent {
pub event_id: String,
pub item: ResponseItem,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ConversationItemDeletedEvent {
pub event_id: String,
pub item_id: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ConversationItemTruncatedEvent {
pub event_id: String,
pub item_id: String,
pub content_index: u32,
pub audio_end_ms: u32,
}
#[derive(Debug, Clone, Deserialize)]
pub struct InputAudioTranscriptionCompletedEvent {
pub event_id: String,
pub item_id: String,
pub content_index: u32,
pub transcript: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct InputAudioTranscriptionFailedEvent {
pub event_id: String,
pub item_id: String,
pub content_index: u32,
pub error: RealtimeError,
}
#[derive(Debug, Clone, Deserialize)]
pub struct InputAudioBufferCommittedEvent {
pub event_id: String,
#[serde(default)]
pub previous_item_id: Option<String>,
pub item_id: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct InputAudioBufferClearedEvent {
pub event_id: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct SpeechStartedEvent {
pub event_id: String,
pub audio_start_ms: u32,
pub item_id: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct SpeechStoppedEvent {
pub event_id: String,
pub audio_end_ms: u32,
#[serde(default)]
pub item_id: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct OutputAudioBufferEvent {
pub event_id: String,
pub response_id: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct OutputAudioBufferStoppedEvent {
pub event_id: String,
pub response_id: String,
pub audio_end_ms: u32,
pub item_id: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseCreatedEvent {
pub event_id: String,
pub response: ResponseInfo,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseDoneEvent {
pub event_id: String,
pub response: ResponseInfo,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseInfo {
pub id: String,
pub object: String,
pub status: ResponseStatus,
#[serde(default)]
pub status_details: Option<serde_json::Value>,
#[serde(default)]
pub output: Vec<ResponseItem>,
#[serde(default)]
pub usage: Option<RealtimeUsage>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ResponseStatus {
InProgress,
Completed,
Cancelled,
Incomplete,
Failed,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseItem {
pub id: String,
pub object: String,
#[serde(rename = "type")]
pub item_type: String,
#[serde(default)]
pub role: Option<String>,
#[serde(default)]
pub content: Vec<ResponseContentPart>,
#[serde(default)]
pub status: Option<ItemStatus>,
#[serde(default)]
pub call_id: Option<String>,
#[serde(default)]
pub name: Option<String>,
#[serde(default)]
pub arguments: Option<String>,
#[serde(default)]
pub output: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseContentPart {
#[serde(rename = "type")]
pub content_type: String,
#[serde(default)]
pub text: Option<String>,
#[serde(default)]
pub audio: Option<String>,
#[serde(default)]
pub transcript: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseOutputItemEvent {
pub event_id: String,
pub response_id: String,
pub output_index: u32,
pub item: ResponseItem,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseContentPartEvent {
pub event_id: String,
pub response_id: String,
pub item_id: String,
pub output_index: u32,
pub content_index: u32,
pub part: ResponseContentPart,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseTextDeltaEvent {
pub event_id: String,
pub response_id: String,
pub item_id: String,
pub output_index: u32,
pub content_index: u32,
pub delta: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseTextDoneEvent {
pub event_id: String,
pub response_id: String,
pub item_id: String,
pub output_index: u32,
pub content_index: u32,
pub text: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseAudioDeltaEvent {
pub event_id: String,
pub response_id: String,
pub item_id: String,
pub output_index: u32,
pub content_index: u32,
pub delta: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseAudioDoneEvent {
pub event_id: String,
pub response_id: String,
pub item_id: String,
pub output_index: u32,
pub content_index: u32,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseAudioTranscriptDeltaEvent {
pub event_id: String,
pub response_id: String,
pub item_id: String,
pub output_index: u32,
pub content_index: u32,
pub delta: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseAudioTranscriptDoneEvent {
pub event_id: String,
pub response_id: String,
pub item_id: String,
pub output_index: u32,
pub content_index: u32,
pub transcript: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseFunctionCallArgumentsDeltaEvent {
pub event_id: String,
pub response_id: String,
pub item_id: String,
pub output_index: u32,
pub call_id: String,
pub delta: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ResponseFunctionCallArgumentsDoneEvent {
pub event_id: String,
pub response_id: String,
pub item_id: String,
pub output_index: u32,
pub call_id: String,
pub name: String,
pub arguments: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct RateLimitsUpdatedEvent {
pub event_id: String,
pub rate_limits: Vec<RateLimit>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct RateLimit {
pub name: String,
pub limit: u32,
pub remaining: u32,
pub reset_seconds: f32,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ErrorEvent {
pub event_id: String,
pub error: RealtimeError,
}
#[derive(Debug, Clone, Deserialize)]
pub struct RealtimeError {
#[serde(rename = "type")]
pub error_type: Option<String>,
pub code: Option<String>,
pub message: String,
#[serde(default)]
pub param: Option<String>,
#[serde(default)]
pub event_id: Option<String>,
}
#[derive(Debug, Clone, Default, Deserialize)]
pub struct RealtimeUsage {
pub total_tokens: u32,
pub input_tokens: u32,
pub output_tokens: u32,
#[serde(default)]
pub input_token_details: Option<InputTokenDetails>,
#[serde(default)]
pub output_token_details: Option<OutputTokenDetails>,
}
#[derive(Debug, Clone, Default, Deserialize)]
pub struct InputTokenDetails {
#[serde(default)]
pub cached_tokens: u32,
#[serde(default)]
pub text_tokens: u32,
#[serde(default)]
pub audio_tokens: u32,
}
#[derive(Debug, Clone, Default, Deserialize)]
pub struct OutputTokenDetails {
#[serde(default)]
pub text_tokens: u32,
#[serde(default)]
pub audio_tokens: u32,
}
impl ServerEvent {
pub fn is_error(&self) -> bool {
matches!(self, Self::Error(_))
}
pub fn event_id(&self) -> Option<&str> {
match self {
Self::SessionCreated(e) => Some(&e.event_id),
Self::SessionUpdated(e) => Some(&e.event_id),
Self::ConversationCreated(e) => Some(&e.event_id),
Self::ConversationItemCreated(e) => Some(&e.event_id),
Self::ConversationItemRetrieved(e) => Some(&e.event_id),
Self::ConversationItemDeleted(e) => Some(&e.event_id),
Self::ConversationItemTruncated(e) => Some(&e.event_id),
Self::InputAudioTranscriptionCompleted(e) => Some(&e.event_id),
Self::InputAudioTranscriptionFailed(e) => Some(&e.event_id),
Self::InputAudioBufferCommitted(e) => Some(&e.event_id),
Self::InputAudioBufferCleared(e) => Some(&e.event_id),
Self::InputAudioBufferSpeechStarted(e) => Some(&e.event_id),
Self::InputAudioBufferSpeechStopped(e) => Some(&e.event_id),
Self::OutputAudioBufferStarted(e) => Some(&e.event_id),
Self::OutputAudioBufferStopped(e) => Some(&e.event_id),
Self::OutputAudioBufferCleared(e) => Some(&e.event_id),
Self::ResponseCreated(e) => Some(&e.event_id),
Self::ResponseDone(e) => Some(&e.event_id),
Self::ResponseOutputItemAdded(e) => Some(&e.event_id),
Self::ResponseOutputItemDone(e) => Some(&e.event_id),
Self::ResponseContentPartAdded(e) => Some(&e.event_id),
Self::ResponseContentPartDone(e) => Some(&e.event_id),
Self::ResponseTextDelta(e) => Some(&e.event_id),
Self::ResponseTextDone(e) => Some(&e.event_id),
Self::ResponseAudioDelta(e) => Some(&e.event_id),
Self::ResponseAudioDone(e) => Some(&e.event_id),
Self::ResponseAudioTranscriptDelta(e) => Some(&e.event_id),
Self::ResponseAudioTranscriptDone(e) => Some(&e.event_id),
Self::ResponseFunctionCallArgumentsDelta(e) => Some(&e.event_id),
Self::ResponseFunctionCallArgumentsDone(e) => Some(&e.event_id),
Self::RateLimitsUpdated(e) => Some(&e.event_id),
Self::Error(e) => Some(&e.event_id),
}
}
}