outfox_openai/spec/realtime/
server_event.rs

1use serde::{Deserialize, Serialize};
2
3use super::{
4    content_part::ContentPart, conversation::Conversation, error::RealtimeAPIError, item::Item,
5    rate_limit::RateLimit, response_resource::ResponseResource, session_resource::SessionResource,
6};
7
8#[derive(Debug, Serialize, Deserialize, Clone)]
9pub struct ErrorEvent {
10    /// The unique ID of the server event.
11    pub event_id: String,
12    /// Details of the error.
13    pub error: RealtimeAPIError,
14}
15
16#[derive(Debug, Serialize, Deserialize, Clone)]
17pub struct SessionCreatedEvent {
18    /// The unique ID of the server event.
19    pub event_id: String,
20    /// The session resource.
21    pub session: SessionResource,
22}
23
24#[derive(Debug, Serialize, Deserialize, Clone)]
25pub struct SessionUpdatedEvent {
26    /// The unique ID of the server event.
27    pub event_id: String,
28    /// The updated session resource.
29    pub session: SessionResource,
30}
31
32#[derive(Debug, Serialize, Deserialize, Clone)]
33pub struct ConversationCreatedEvent {
34    /// The unique ID of the server event.
35    pub event_id: String,
36    /// The conversation resource.
37    pub conversation: Conversation,
38}
39
40#[derive(Debug, Serialize, Deserialize, Clone)]
41pub struct InputAudioBufferCommittedEvent {
42    /// The unique ID of the server event.
43    pub event_id: String,
44    /// The ID of the preceding item after which the new item will be inserted.
45    pub previous_item_id: String,
46    /// The ID of the user message item that will be created.
47    pub item_id: String,
48}
49
50#[derive(Debug, Serialize, Deserialize, Clone)]
51pub struct InputAudioBufferClearedEvent {
52    /// The unique ID of the server event.
53    pub event_id: String,
54}
55
56#[derive(Debug, Serialize, Deserialize, Clone)]
57pub struct InputAudioBufferSpeechStartedEvent {
58    /// The unique ID of the server event.
59    pub event_id: String,
60    /// Milliseconds since the session started when speech was detected.
61    pub audio_start_ms: u32,
62    /// The ID of the user message item that will be created when speech stops.
63    pub item_id: String,
64}
65
66#[derive(Debug, Serialize, Deserialize, Clone)]
67pub struct InputAudioBufferSpeechStoppedEvent {
68    /// The unique ID of the server event.
69    pub event_id: String,
70    /// Milliseconds since the session started when speech stopped.
71    pub audio_end_ms: u32,
72    /// The ID of the user message item that will be created.
73    pub item_id: String,
74}
75
76#[derive(Debug, Serialize, Deserialize, Clone)]
77pub struct ConversationItemCreatedEvent {
78    /// The unique ID of the server event.
79    pub event_id: String,
80    /// The ID of the preceding item.
81    pub previous_item_id: Option<String>,
82    /// The item that was created.
83    pub item: Item,
84}
85
86#[derive(Debug, Serialize, Deserialize, Clone)]
87/// Log probability information for a transcribed token.
88pub struct LogProb {
89    /// Raw UTF-8 bytes for the token.
90    pub bytes: Vec<u8>,
91    /// The log probability of the token.
92    pub logprob: f64,
93    /// The token string.
94    pub token: String,
95}
96
97#[derive(Debug, Serialize, Deserialize, Clone)]
98pub struct ConversationItemInputAudioTranscriptionCompletedEvent {
99    /// The unique ID of the server event.
100    pub event_id: String,
101    /// The ID of the user message item.
102    pub item_id: String,
103    /// The index of the content part containing the audio.
104    pub content_index: u32,
105    /// The transcribed text.
106    pub transcript: String,
107    /// Optional per-token log probability data.
108    pub logprobs: Option<Vec<LogProb>>,
109}
110
111#[derive(Debug, Serialize, Deserialize, Clone)]
112pub struct ConversationItemInputAudioTranscriptionDeltaEvent {
113    /// The unique ID of the server event.
114    pub event_id: String,
115    /// The ID of the user message item.
116    pub item_id: String,
117    /// The index of the content part containing the audio.
118    pub content_index: u32,
119    /// The text delta.
120    pub delta: String,
121    /// Optional per-token log probability data.
122    pub logprobs: Option<Vec<LogProb>>,
123}
124
125#[derive(Debug, Serialize, Deserialize, Clone)]
126pub struct ConversationItemInputAudioTranscriptionFailedEvent {
127    /// The unique ID of the server event.
128    pub event_id: String,
129    /// The ID of the user message item.
130    pub item_id: String,
131    /// The index of the content part containing the audio.
132    pub content_index: u32,
133    /// Details of the transcription error.
134    pub error: RealtimeAPIError,
135}
136
137#[derive(Debug, Serialize, Deserialize, Clone)]
138pub struct ConversationItemTruncatedEvent {
139    /// The unique ID of the server event.
140    pub event_id: String,
141    /// The ID of the assistant message item that was truncated.
142    pub item_id: String,
143    /// The index of the content part that was truncated.
144    pub content_index: u32,
145    /// The duration up to which the audio was truncated, in milliseconds.
146    pub audio_end_ms: u32,
147}
148
149#[derive(Debug, Serialize, Deserialize, Clone)]
150pub struct ConversationItemDeletedEvent {
151    /// The unique ID of the server event.
152    pub event_id: String,
153    /// The ID of the item that was deleted.
154    pub item_id: String,
155}
156
157#[derive(Debug, Serialize, Deserialize, Clone)]
158pub struct ResponseCreatedEvent {
159    /// The unique ID of the server event.
160    pub event_id: String,
161    /// The response resource.
162    pub response: ResponseResource,
163}
164
165#[derive(Debug, Serialize, Deserialize, Clone)]
166pub struct ResponseDoneEvent {
167    /// The unique ID of the server event.
168    pub event_id: String,
169    /// The response resource.
170    pub response: ResponseResource,
171}
172
173#[derive(Debug, Serialize, Deserialize, Clone)]
174pub struct ResponseOutputItemAddedEvent {
175    /// The unique ID of the server event.
176    pub event_id: String,
177    /// The ID of the response to which the item belongs.
178    pub response_id: String,
179    /// The index of the output item in the response.
180    pub output_index: u32,
181    /// The item that was added.
182    pub item: Item,
183}
184
185#[derive(Debug, Serialize, Deserialize, Clone)]
186pub struct ResponseOutputItemDoneEvent {
187    /// The unique ID of the server event.
188    pub event_id: String,
189    /// The ID of the response to which the item belongs.
190    pub response_id: String,
191    /// The index of the output item in the response.
192    pub output_index: u32,
193    /// The completed item.
194    pub item: Item,
195}
196
197#[derive(Debug, Serialize, Deserialize, Clone)]
198pub struct ResponseContentPartAddedEvent {
199    /// The unique ID of the server event.
200    pub event_id: String,
201    /// The ID of the response.
202    pub response_id: String,
203    /// The ID of the item to which the content part was added.
204    pub item_id: String,
205    /// The index of the output item in the response.
206    pub output_index: u32,
207    /// The index of the content part in the item's content array.
208    pub content_index: u32,
209    /// The content part that was added.
210    pub part: ContentPart,
211}
212
213#[derive(Debug, Serialize, Deserialize, Clone)]
214pub struct ResponseContentPartDoneEvent {
215    /// The unique ID of the server event.
216    pub event_id: String,
217    /// The ID of the response.
218    pub response_id: String,
219    /// The ID of the item to which the content part was added.
220    pub item_id: String,
221    /// The index of the output item in the response.
222    pub output_index: u32,
223    /// The index of the content part in the item's content array.
224    pub content_index: u32,
225    /// The content part that is done.
226    pub part: ContentPart,
227}
228
229#[derive(Debug, Serialize, Deserialize, Clone)]
230pub struct ResponseTextDeltaEvent {
231    /// The unique ID of the server event.
232    pub event_id: String,
233    /// The ID of the response.
234    pub response_id: String,
235    /// The ID of the item.
236    pub item_id: String,
237    /// The index of the output item in the response.
238    pub output_index: u32,
239    /// The index of the content part in the item's content array.
240    pub content_index: u32,
241    /// The text delta.
242    pub delta: String,
243}
244
245#[derive(Debug, Serialize, Deserialize, Clone)]
246pub struct ResponseTextDoneEvent {
247    /// The unique ID of the server event.
248    pub event_id: String,
249    /// The ID of the response.
250    pub response_id: String,
251    /// The ID of the item.
252    pub item_id: String,
253    /// The index of the output item in the response.
254    pub output_index: u32,
255    /// The index of the content part in the item's content array.
256    pub content_index: u32,
257    /// The final text content.
258    pub text: String,
259}
260
261#[derive(Debug, Serialize, Deserialize, Clone)]
262pub struct ResponseAudioTranscriptDeltaEvent {
263    /// The unique ID of the server event.
264    pub event_id: String,
265    /// The ID of the response.
266    pub response_id: String,
267    /// The ID of the item.
268    pub item_id: String,
269    /// The index of the output item in the response.
270    pub output_index: u32,
271    /// The index of the content part in the item's content array.
272    pub content_index: u32,
273    /// The text delta.
274    pub delta: String,
275}
276
277#[derive(Debug, Serialize, Deserialize, Clone)]
278pub struct ResponseAudioTranscriptDoneEvent {
279    /// The unique ID of the server event.
280    pub event_id: String,
281    /// The ID of the response.
282    pub response_id: String,
283    /// The ID of the item.
284    pub item_id: String,
285    /// The index of the output item in the response.
286    pub output_index: u32,
287    /// The index of the content part in the item's content array.
288    pub content_index: u32,
289    ///The final transcript of the audio.
290    pub transcript: String,
291}
292
293#[derive(Debug, Serialize, Deserialize, Clone)]
294pub struct ResponseAudioDeltaEvent {
295    /// The unique ID of the server event.
296    pub event_id: String,
297    /// The ID of the response.
298    pub response_id: String,
299    /// The ID of the item.
300    pub item_id: String,
301    /// The index of the output item in the response.
302    pub output_index: u32,
303    /// The index of the content part in the item's content array.
304    pub content_index: u32,
305    /// Base64-encoded audio data delta.
306    pub delta: String,
307}
308
309#[derive(Debug, Serialize, Deserialize, Clone)]
310pub struct ResponseAudioDoneEvent {
311    /// The unique ID of the server event.
312    pub event_id: String,
313    /// The ID of the response.
314    pub response_id: String,
315    /// The ID of the item.
316    pub item_id: String,
317    /// The index of the output item in the response.
318    pub output_index: u32,
319    /// The index of the content part in the item's content array.
320    pub content_index: u32,
321}
322
323#[derive(Debug, Serialize, Deserialize, Clone)]
324pub struct ResponseFunctionCallArgumentsDeltaEvent {
325    /// The unique ID of the server event.
326    pub event_id: String,
327    /// The ID of the response.
328    pub response_id: String,
329    /// The ID of the function call item.
330    pub item_id: String,
331    /// The index of the output item in the response.
332    pub output_index: u32,
333    /// The ID of the function call.
334    pub call_id: String,
335    /// The arguments delta as a JSON string.
336    pub delta: String,
337}
338
339#[derive(Debug, Serialize, Deserialize, Clone)]
340pub struct ResponseFunctionCallArgumentsDoneEvent {
341    /// The unique ID of the server event.
342    pub event_id: String,
343    /// The ID of the response.
344    pub response_id: String,
345    /// The ID of the function call item.
346    pub item_id: String,
347    /// The index of the output item in the response.
348    pub output_index: u32,
349    /// The ID of the function call.
350    pub call_id: String,
351    /// The final arguments as a JSON string.
352    pub arguments: String,
353}
354
355#[derive(Debug, Serialize, Deserialize, Clone)]
356pub struct RateLimitsUpdatedEvent {
357    /// The unique ID of the server event.
358    pub event_id: String,
359    pub rate_limits: Vec<RateLimit>,
360}
361
362/// These are events emitted from the OpenAI Realtime WebSocket server to the client.
363#[derive(Debug, Serialize, Deserialize, Clone)]
364#[serde(tag = "type")]
365pub enum ServerEvent {
366    /// Returned when an error occurs.
367    #[serde(rename = "error")]
368    Error(ErrorEvent),
369
370    /// Returned when a session is created. Emitted automatically when a new connection is established.
371    #[serde(rename = "session.created")]
372    SessionCreated(SessionCreatedEvent),
373
374    /// Returned when a session is updated.
375    #[serde(rename = "session.updated")]
376    SessionUpdated(SessionUpdatedEvent),
377
378    /// Returned when a conversation is created. Emitted right after session creation.
379    #[serde(rename = "conversation.created")]
380    ConversationCreated(ConversationCreatedEvent),
381
382    /// Returned when an input audio buffer is committed, either by the client or automatically in server VAD mode.
383    #[serde(rename = "input_audio_buffer.committed")]
384    InputAudioBufferCommitted(InputAudioBufferCommittedEvent),
385
386    /// Returned when the input audio buffer is cleared by the client.
387    #[serde(rename = "input_audio_buffer.cleared")]
388    InputAudioBufferCleared(InputAudioBufferClearedEvent),
389
390    /// Returned in server turn detection mode when speech is detected.
391    #[serde(rename = "input_audio_buffer.speech_started")]
392    InputAudioBufferSpeechStarted(InputAudioBufferSpeechStartedEvent),
393
394    /// Returned in server turn detection mode when speech stops.
395    #[serde(rename = "input_audio_buffer.speech_stopped")]
396    InputAudioBufferSpeechStopped(InputAudioBufferSpeechStoppedEvent),
397
398    /// Returned when a conversation item is created.
399    #[serde(rename = "conversation.item.created")]
400    ConversationItemCreated(ConversationItemCreatedEvent),
401
402    /// Returned when input audio transcription is enabled and a transcription succeeds.
403    #[serde(rename = "conversation.item.input_audio_transcription.completed")]
404    ConversationItemInputAudioTranscriptionCompleted(
405        ConversationItemInputAudioTranscriptionCompletedEvent,
406    ),
407
408    #[serde(rename = "conversation.item.input_audio_transcription.delta")]
409    ConversationItemInputAudioTranscriptionDelta(ConversationItemInputAudioTranscriptionDeltaEvent),
410
411    /// Returned when input audio transcription is configured, and a transcription request for a user message failed.
412    #[serde(rename = "conversation.item.input_audio_transcription.failed")]
413    ConversationItemInputAudioTranscriptionFailed(
414        ConversationItemInputAudioTranscriptionFailedEvent,
415    ),
416
417    /// Returned when an earlier assistant audio message item is truncated by the client.
418    #[serde(rename = "conversation.item.truncated")]
419    ConversationItemTruncated(ConversationItemTruncatedEvent),
420
421    /// Returned when an item in the conversation is deleted.
422    #[serde(rename = "conversation.item.deleted")]
423    ConversationItemDeleted(ConversationItemDeletedEvent),
424
425    /// Returned when a new Response is created. The first event of response creation, where the response is in an initial state of "in_progress".
426    #[serde(rename = "response.created")]
427    ResponseCreated(ResponseCreatedEvent),
428
429    /// Returned when a Response is done streaming. Always emitted, no matter the final state.
430    #[serde(rename = "response.done")]
431    ResponseDone(ResponseDoneEvent),
432
433    /// Returned when a new Item is created during response generation.
434    #[serde(rename = "response.output_item.added")]
435    ResponseOutputItemAdded(ResponseOutputItemAddedEvent),
436
437    /// Returned when an Item is done streaming. Also emitted when a Response is interrupted, incomplete, or cancelled.
438    #[serde(rename = "response.output_item.done")]
439    ResponseOutputItemDone(ResponseOutputItemDoneEvent),
440
441    /// Returned when a new content part is added to an assistant message item during response generation.
442    #[serde(rename = "response.content_part.added")]
443    ResponseContentPartAdded(ResponseContentPartAddedEvent),
444
445    /// Returned when a content part is done streaming in an assistant message item.
446    /// Also emitted when a Response is interrupted, incomplete, or cancelled.
447    #[serde(rename = "response.content_part.done")]
448    ResponseContentPartDone(ResponseContentPartDoneEvent),
449
450    /// Returned when the text value of a "text" content part is updated.
451    #[serde(rename = "response.text.delta")]
452    ResponseTextDelta(ResponseTextDeltaEvent),
453
454    /// Returned when the text value of a "text" content part is done streaming.
455    /// Also emitted when a Response is interrupted, incomplete, or cancelled.
456    #[serde(rename = "response.text.done")]
457    ResponseTextDone(ResponseTextDoneEvent),
458
459    /// Returned when the model-generated transcription of audio output is updated.
460    #[serde(rename = "response.audio_transcript.delta")]
461    ResponseAudioTranscriptDelta(ResponseAudioTranscriptDeltaEvent),
462
463    /// Returned when the model-generated transcription of audio output is done streaming.
464    /// Also emitted when a Response is interrupted, incomplete, or cancelled.
465    #[serde(rename = "response.audio_transcript.done")]
466    ResponseAudioTranscriptDone(ResponseAudioTranscriptDoneEvent),
467
468    /// Returned when the model-generated audio is updated.
469    #[serde(rename = "response.audio.delta")]
470    ResponseAudioDelta(ResponseAudioDeltaEvent),
471
472    /// Returned when the model-generated audio is done.
473    /// Also emitted when a Response is interrupted, incomplete, or cancelled.
474    #[serde(rename = "response.audio.done")]
475    ResponseAudioDone(ResponseAudioDoneEvent),
476
477    /// Returned when the model-generated function call arguments are updated.
478    #[serde(rename = "response.function_call_arguments.delta")]
479    ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDeltaEvent),
480
481    /// Returned when the model-generated function call arguments are done streaming.
482    /// Also emitted when a Response is interrupted, incomplete, or cancelled.
483    #[serde(rename = "response.function_call_arguments.done")]
484    ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent),
485
486    /// Emitted after every "response.done" event to indicate the updated rate limits.
487    #[serde(rename = "rate_limits.updated")]
488    RateLimitsUpdated(RateLimitsUpdatedEvent),
489}