async_openai_wasm/types/realtime/
client_event.rs

1use serde::{Deserialize, Serialize};
2
3use crate::types::realtime::{RealtimeConversationItem, RealtimeResponseCreateParams, Session};
4
5///
6/// A trait same as Into<String>
7///
8/// For converting event structs into text part of WS message
9///
10pub trait ToText {
11    fn to_text(self) -> String;
12}
13
14#[derive(Debug, Serialize, Deserialize, Clone)]
15pub struct RealtimeClientEventSessionUpdate {
16    /// Optional client-generated ID used to identify this event.
17    /// This is an arbitrary string that a client may assign. It will be passed
18    /// back if there is an error with the event, but the corresponding
19    /// `session.updated` event will not include it.
20    #[serde(skip_serializing_if = "Option::is_none")]
21    pub event_id: Option<String>,
22    /// Update the Realtime session. Choose either a realtime session or a transcription session.
23    pub session: Session,
24}
25
26#[derive(Debug, Serialize, Deserialize, Clone, Default)]
27pub struct RealtimeClientEventInputAudioBufferAppend {
28    /// Optional client-generated ID used to identify this event.
29    #[serde(skip_serializing_if = "Option::is_none")]
30    pub event_id: Option<String>,
31    /// Base64-encoded audio bytes. This must be in the format specified by
32    /// the `input_audio_format` field in the session configuration.
33    pub audio: String,
34}
35
36#[derive(Debug, Serialize, Deserialize, Clone, Default)]
37pub struct RealtimeClientEventInputAudioBufferCommit {
38    /// Optional client-generated ID used to identify this event.
39    #[serde(skip_serializing_if = "Option::is_none")]
40    pub event_id: Option<String>,
41}
42
43#[derive(Debug, Serialize, Deserialize, Clone, Default)]
44pub struct RealtimeClientEventInputAudioBufferClear {
45    /// Optional client-generated ID used to identify this event.
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub event_id: Option<String>,
48}
49
50#[derive(Debug, Serialize, Deserialize, Clone)]
51pub struct RealtimeClientEventConversationItemCreate {
52    /// Optional client-generated ID used to identify this event.
53    #[serde(skip_serializing_if = "Option::is_none")]
54    pub event_id: Option<String>,
55
56    /// The ID of the preceding item after which the new item will be inserted.
57    /// If not set, the new item will be appended to the end of the conversation.
58    /// If set to `root`, the new item will be added to the beginning of the conversation.
59    /// If set to an existing ID, it allows an item to be inserted mid-conversation.
60    /// If the ID cannot be found, an error will be returned and the item will not be added.
61    #[serde(skip_serializing_if = "Option::is_none")]
62    pub previous_item_id: Option<String>,
63
64    /// A single item within a Realtime conversation.
65    pub item: RealtimeConversationItem,
66}
67
68#[derive(Debug, Serialize, Deserialize, Clone, Default)]
69pub struct RealtimeClientEventConversationItemRetrieve {
70    /// Optional client-generated ID used to identify this event.
71    #[serde(skip_serializing_if = "Option::is_none")]
72    pub event_id: Option<String>,
73
74    /// The ID of the item to retrieve.
75    pub item_id: String,
76}
77
78#[derive(Debug, Serialize, Deserialize, Clone, Default)]
79pub struct RealtimeClientEventConversationItemTruncate {
80    /// Optional client-generated ID used to identify this event.
81    #[serde(skip_serializing_if = "Option::is_none")]
82    pub event_id: Option<String>,
83
84    /// The ID of the assistant message item to truncate. Only assistant message items can be truncated.
85    pub item_id: String,
86
87    /// The index of the content part to truncate. Set this to `0`.
88    pub content_index: u32,
89
90    /// Inclusive duration up to which audio is truncated, in milliseconds.
91    /// If the audio_end_ms is greater than the actual audio duration, the server will respond with an error.
92    pub audio_end_ms: u32,
93}
94
95#[derive(Debug, Serialize, Deserialize, Clone, Default)]
96pub struct RealtimeClientEventConversationItemDelete {
97    /// Optional client-generated ID used to identify this event.
98    #[serde(skip_serializing_if = "Option::is_none")]
99    pub event_id: Option<String>,
100
101    /// The ID of the item to delete.
102    pub item_id: String,
103}
104
105#[derive(Debug, Serialize, Deserialize, Clone, Default)]
106pub struct RealtimeClientEventResponseCreate {
107    /// Optional client-generated ID used to identify this event.
108    #[serde(skip_serializing_if = "Option::is_none")]
109    pub event_id: Option<String>,
110
111    /// Create a new Realtime response with these parameters
112    pub response: Option<RealtimeResponseCreateParams>,
113}
114
115#[derive(Debug, Serialize, Deserialize, Clone, Default)]
116pub struct RealtimeClientEventResponseCancel {
117    /// Optional client-generated ID used to identify this event.
118    #[serde(skip_serializing_if = "Option::is_none")]
119    pub event_id: Option<String>,
120
121    /// A specific response ID to cancel - if not provided, will cancel an
122    /// in-progress response in the default conversation.
123    #[serde(skip_serializing_if = "Option::is_none")]
124    pub response_id: Option<String>,
125}
126
127#[derive(Debug, Serialize, Deserialize, Clone, Default)]
128pub struct RealtimeClientEventOutputAudioBufferClear {
129    /// Optional client-generated ID used to identify this event.
130    #[serde(skip_serializing_if = "Option::is_none")]
131    pub event_id: Option<String>,
132}
133
134/// These are events that the OpenAI Realtime WebSocket server will accept from the client.
135#[derive(Debug, Serialize, Deserialize)]
136#[serde(tag = "type")]
137pub enum RealtimeClientEvent {
138    /// Send this event to update the session's configuration. The client may send this event at any time to update any field
139    /// except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
140    ///
141    /// When the server receives a `session.update`, it will respond with a `session.updated` event showing the full, effective
142    /// configuration. Only the fields that are present in the `session.update` are updated. To clear a field like `instructions`,
143    /// pass an empty string. To clear a field like `tools`, pass an empty array. To clear a field like `turn_detection`, pass `null`.
144    #[serde(rename = "session.update")]
145    SessionUpdate(RealtimeClientEventSessionUpdate),
146
147    /// Send this event to append audio bytes to the input audio buffer. The audio buffer is temporary storage you can write to and later commit.
148    /// A "commit" will create a new user message item in the conversation history from the buffer content and clear the buffer. Input audio
149    /// transcription (if enabled) will be generated when the buffer is committed.
150    ///
151    /// If VAD is enabled the audio buffer is used to detect speech and the server will decide when to commit. When Server VAD is disabled,
152    /// you must commit the audio buffer manually. Input audio noise reduction operates on writes to the audio buffer.
153    ///
154    /// The client may choose how much audio to place in each event up to a maximum of 15 MiB, for example streaming smaller chunks from the
155    /// client may allow the VAD to be more responsive. Unlike most other client events, the server will not send a confirmation response to
156    /// this event.
157    #[serde(rename = "input_audio_buffer.append")]
158    InputAudioBufferAppend(RealtimeClientEventInputAudioBufferAppend),
159
160    /// Send this event to commit the user input audio buffer, which will create a new user message item in the conversation.
161    /// This event will produce an error if the input audio buffer is empty.
162    /// When in Server VAD mode, the client does not need to send this event, the server will commit the audio buffer automatically.
163    /// Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model.
164    /// The server will respond with an input_audio_buffer.committed event.
165    #[serde(rename = "input_audio_buffer.commit")]
166    InputAudioBufferCommit(RealtimeClientEventInputAudioBufferCommit),
167
168    /// Send this event to clear the audio bytes in the buffer.
169    /// The server will respond with an `input_audio_buffer.cleared` event.
170    #[serde(rename = "input_audio_buffer.clear")]
171    InputAudioBufferClear(RealtimeClientEventInputAudioBufferClear),
172
173    /// Add a new Item to the Conversation's context, including messages, function calls, and function call responses.
174    /// This event can be used both to populate a "history" of the conversation and to add new items mid-stream,
175    /// but has the current limitation that it cannot populate assistant audio messages.
176    ///
177    /// If successful, the server will respond with a `conversation.item.created` event, otherwise an `error` event will be sent.
178    #[serde(rename = "conversation.item.create")]
179    ConversationItemCreate(RealtimeClientEventConversationItemCreate),
180
181    /// Send this event when you want to retrieve the server's representation of a specific item in the conversation history.
182    /// This is useful, for example, to inspect user audio after noise cancellation and VAD.
183    /// The server will respond with a `conversation.item.retrieved` event, unless the item does not exist in the conversation history,
184    /// in which case the server will respond with an error.
185    #[serde(rename = "conversation.item.retrieve")]
186    ConversationItemRetrieve(RealtimeClientEventConversationItemRetrieve),
187
188    /// Send this event to truncate a previous assistant message's audio. The server will produce audio faster than realtime,
189    /// so this event is useful when the user interrupts to truncate audio that has already been sent to the client but not
190    /// yet played. This will synchronize the server's understanding of the audio with the client's playback.
191    ///
192    /// Truncating audio will delete the server-side text transcript to ensure there is not text in the context that hasn't
193    /// been heard by the user.
194    ///
195    /// If successful, the server will respond with a `conversation.item.truncated` event.
196    #[serde(rename = "conversation.item.truncate")]
197    ConversationItemTruncate(RealtimeClientEventConversationItemTruncate),
198
199    /// Send this event when you want to remove any item from the conversation history. The server will respond with a
200    /// `conversation.item.deleted` event, unless the item does not exist in the conversation history, in which case the
201    /// server will respond with an error.
202    #[serde(rename = "conversation.item.delete")]
203    ConversationItemDelete(RealtimeClientEventConversationItemDelete),
204
205    /// This event instructs the server to create a Response, which means triggering model inference.
206    /// When in Server VAD mode, the server will create Responses automatically.
207    ///
208    /// A Response will include at least one Item, and may have two, in which case the second will be a function call.
209    /// These Items will be appended to the conversation history by default.
210    ///
211    /// The server will respond with a `response.created` event, events for Items and content created, and finally a
212    /// `response.done` event to indicate the Response is complete.
213    ///
214    /// The `response.create` event includes inference configuration like `instructions` and `tools`. If these are set, they will
215    /// override the Session's configuration for this Response only.
216    ///
217    /// Responses can be created out-of-band of the default Conversation, meaning that they can have arbitrary input, and
218    /// it's possible to disable writing the output to the Conversation. Only one Response can write to the default
219    /// Conversation at a time, but otherwise multiple Responses can be created in parallel. The `metadata` field is a good
220    /// way to disambiguate multiple simultaneous Responses.
221    ///
222    /// Clients can set `conversation` to `none` to create a Response that does not write to the default Conversation.
223    /// Arbitrary input can be provided with the `input` field, which is an array accepting raw Items and references to
224    /// existing Items.
225    #[serde(rename = "response.create")]
226    ResponseCreate(RealtimeClientEventResponseCreate),
227
228    /// Send this event to cancel an in-progress response. The server will respond with a `response.done` event
229    /// with a status of `response.status=cancelled`. If there is no response to cancel, the server will respond
230    /// with an error. It's safe to call `response.cancel` even if no response is in progress, an error will be
231    /// returned the session will remain unaffected.
232    #[serde(rename = "response.cancel")]
233    ResponseCancel(RealtimeClientEventResponseCancel),
234
235    /// **WebRTC Only:** Emit to cut off the current audio response.
236    /// This will trigger the server to stop generating audio and emit a `output_audio_buffer.cleared` event.
237    /// This event should be preceded by a `response.cancel` client event to stop the generation of the current response.
238    /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc)
239    #[serde(rename = "output_audio_buffer.clear")]
240    OutputAudioBufferClear(RealtimeClientEventOutputAudioBufferClear),
241}
242
243impl From<&RealtimeClientEvent> for String {
244    fn from(value: &RealtimeClientEvent) -> Self {
245        serde_json::to_string(value).unwrap()
246    }
247}
248
249macro_rules! event_from {
250    ($from_typ:ty, $evt_typ:ty, $variant:ident) => {
251        impl From<$from_typ> for $evt_typ {
252            fn from(value: $from_typ) -> Self {
253                <$evt_typ>::$variant(value)
254            }
255        }
256    };
257}
258
259event_from!(
260    RealtimeClientEventSessionUpdate,
261    RealtimeClientEvent,
262    SessionUpdate
263);
264event_from!(
265    RealtimeClientEventInputAudioBufferAppend,
266    RealtimeClientEvent,
267    InputAudioBufferAppend
268);
269event_from!(
270    RealtimeClientEventInputAudioBufferCommit,
271    RealtimeClientEvent,
272    InputAudioBufferCommit
273);
274event_from!(
275    RealtimeClientEventInputAudioBufferClear,
276    RealtimeClientEvent,
277    InputAudioBufferClear
278);
279event_from!(
280    RealtimeClientEventConversationItemCreate,
281    RealtimeClientEvent,
282    ConversationItemCreate
283);
284event_from!(
285    RealtimeClientEventConversationItemTruncate,
286    RealtimeClientEvent,
287    ConversationItemTruncate
288);
289event_from!(
290    RealtimeClientEventConversationItemDelete,
291    RealtimeClientEvent,
292    ConversationItemDelete
293);
294event_from!(
295    RealtimeClientEventConversationItemRetrieve,
296    RealtimeClientEvent,
297    ConversationItemRetrieve
298);
299event_from!(
300    RealtimeClientEventResponseCreate,
301    RealtimeClientEvent,
302    ResponseCreate
303);
304event_from!(
305    RealtimeClientEventResponseCancel,
306    RealtimeClientEvent,
307    ResponseCancel
308);
309event_from!(
310    RealtimeClientEventOutputAudioBufferClear,
311    RealtimeClientEvent,
312    OutputAudioBufferClear
313);
314
315impl From<RealtimeConversationItem> for RealtimeClientEventConversationItemCreate {
316    fn from(value: RealtimeConversationItem) -> Self {
317        Self {
318            event_id: None,
319            previous_item_id: None,
320            item: value,
321        }
322    }
323}
324
325impl<T: Into<RealtimeClientEvent>> ToText for T {
326    // blanket impl for all client event structs
327    fn to_text(self) -> String {
328        (&self.into()).into()
329    }
330}
331
332// Implement EventType trait for all event types in this file
333#[cfg(feature = "_api")]
334macro_rules! impl_event_type {
335    ($($ty:ty => $event_type:expr),* $(,)?) => {
336        $(
337            impl crate::traits::EventType for $ty {
338                fn event_type(&self) -> &'static str {
339                    $event_type
340                }
341            }
342        )*
343    };
344}
345
346#[cfg(feature = "_api")]
347impl_event_type! {
348    RealtimeClientEventSessionUpdate => "session.update",
349    RealtimeClientEventInputAudioBufferAppend => "input_audio_buffer.append",
350    RealtimeClientEventInputAudioBufferCommit => "input_audio_buffer.commit",
351    RealtimeClientEventInputAudioBufferClear => "input_audio_buffer.clear",
352    RealtimeClientEventConversationItemCreate => "conversation.item.create",
353    RealtimeClientEventConversationItemRetrieve => "conversation.item.retrieve",
354    RealtimeClientEventConversationItemTruncate => "conversation.item.truncate",
355    RealtimeClientEventConversationItemDelete => "conversation.item.delete",
356    RealtimeClientEventResponseCreate => "response.create",
357    RealtimeClientEventResponseCancel => "response.cancel",
358    RealtimeClientEventOutputAudioBufferClear => "output_audio_buffer.clear",
359}
360
361#[cfg(feature = "_api")]
362impl crate::traits::EventType for RealtimeClientEvent {
363    fn event_type(&self) -> &'static str {
364        match self {
365            RealtimeClientEvent::SessionUpdate(e) => e.event_type(),
366            RealtimeClientEvent::InputAudioBufferAppend(e) => e.event_type(),
367            RealtimeClientEvent::InputAudioBufferCommit(e) => e.event_type(),
368            RealtimeClientEvent::InputAudioBufferClear(e) => e.event_type(),
369            RealtimeClientEvent::ConversationItemCreate(e) => e.event_type(),
370            RealtimeClientEvent::ConversationItemRetrieve(e) => e.event_type(),
371            RealtimeClientEvent::ConversationItemTruncate(e) => e.event_type(),
372            RealtimeClientEvent::ConversationItemDelete(e) => e.event_type(),
373            RealtimeClientEvent::ResponseCreate(e) => e.event_type(),
374            RealtimeClientEvent::ResponseCancel(e) => e.event_type(),
375            RealtimeClientEvent::OutputAudioBufferClear(e) => e.event_type(),
376        }
377    }
378}