async_openai_wasm/types/realtime/client_event.rs
1use serde::{Deserialize, Serialize};
2
3use crate::types::realtime::{RealtimeConversationItem, RealtimeResponseCreateParams, Session};
4
5///
6/// A trait same as Into<String>
7///
8/// For converting event structs into text part of WS message
9///
10pub trait ToText {
11 fn to_text(self) -> String;
12}
13
14#[derive(Debug, Serialize, Deserialize, Clone)]
15pub struct RealtimeClientEventSessionUpdate {
16 /// Optional client-generated ID used to identify this event.
17 /// This is an arbitrary string that a client may assign. It will be passed
18 /// back if there is an error with the event, but the corresponding
19 /// `session.updated` event will not include it.
20 #[serde(skip_serializing_if = "Option::is_none")]
21 pub event_id: Option<String>,
22 /// Update the Realtime session. Choose either a realtime session or a transcription session.
23 pub session: Session,
24}
25
26#[derive(Debug, Serialize, Deserialize, Clone, Default)]
27pub struct RealtimeClientEventInputAudioBufferAppend {
28 /// Optional client-generated ID used to identify this event.
29 #[serde(skip_serializing_if = "Option::is_none")]
30 pub event_id: Option<String>,
31 /// Base64-encoded audio bytes. This must be in the format specified by
32 /// the `input_audio_format` field in the session configuration.
33 pub audio: String,
34}
35
36#[derive(Debug, Serialize, Deserialize, Clone, Default)]
37pub struct RealtimeClientEventInputAudioBufferCommit {
38 /// Optional client-generated ID used to identify this event.
39 #[serde(skip_serializing_if = "Option::is_none")]
40 pub event_id: Option<String>,
41}
42
43#[derive(Debug, Serialize, Deserialize, Clone, Default)]
44pub struct RealtimeClientEventInputAudioBufferClear {
45 /// Optional client-generated ID used to identify this event.
46 #[serde(skip_serializing_if = "Option::is_none")]
47 pub event_id: Option<String>,
48}
49
50#[derive(Debug, Serialize, Deserialize, Clone)]
51pub struct RealtimeClientEventConversationItemCreate {
52 /// Optional client-generated ID used to identify this event.
53 #[serde(skip_serializing_if = "Option::is_none")]
54 pub event_id: Option<String>,
55
56 /// The ID of the preceding item after which the new item will be inserted.
57 /// If not set, the new item will be appended to the end of the conversation.
58 /// If set to `root`, the new item will be added to the beginning of the conversation.
59 /// If set to an existing ID, it allows an item to be inserted mid-conversation.
60 /// If the ID cannot be found, an error will be returned and the item will not be added.
61 #[serde(skip_serializing_if = "Option::is_none")]
62 pub previous_item_id: Option<String>,
63
64 /// A single item within a Realtime conversation.
65 pub item: RealtimeConversationItem,
66}
67
68#[derive(Debug, Serialize, Deserialize, Clone, Default)]
69pub struct RealtimeClientEventConversationItemRetrieve {
70 /// Optional client-generated ID used to identify this event.
71 #[serde(skip_serializing_if = "Option::is_none")]
72 pub event_id: Option<String>,
73
74 /// The ID of the item to retrieve.
75 pub item_id: String,
76}
77
78#[derive(Debug, Serialize, Deserialize, Clone, Default)]
79pub struct RealtimeClientEventConversationItemTruncate {
80 /// Optional client-generated ID used to identify this event.
81 #[serde(skip_serializing_if = "Option::is_none")]
82 pub event_id: Option<String>,
83
84 /// The ID of the assistant message item to truncate. Only assistant message items can be truncated.
85 pub item_id: String,
86
87 /// The index of the content part to truncate. Set this to `0`.
88 pub content_index: u32,
89
90 /// Inclusive duration up to which audio is truncated, in milliseconds.
91 /// If the audio_end_ms is greater than the actual audio duration, the server will respond with an error.
92 pub audio_end_ms: u32,
93}
94
95#[derive(Debug, Serialize, Deserialize, Clone, Default)]
96pub struct RealtimeClientEventConversationItemDelete {
97 /// Optional client-generated ID used to identify this event.
98 #[serde(skip_serializing_if = "Option::is_none")]
99 pub event_id: Option<String>,
100
101 /// The ID of the item to delete.
102 pub item_id: String,
103}
104
105#[derive(Debug, Serialize, Deserialize, Clone, Default)]
106pub struct RealtimeClientEventResponseCreate {
107 /// Optional client-generated ID used to identify this event.
108 #[serde(skip_serializing_if = "Option::is_none")]
109 pub event_id: Option<String>,
110
111 /// Create a new Realtime response with these parameters
112 pub response: Option<RealtimeResponseCreateParams>,
113}
114
115#[derive(Debug, Serialize, Deserialize, Clone, Default)]
116pub struct RealtimeClientEventResponseCancel {
117 /// Optional client-generated ID used to identify this event.
118 #[serde(skip_serializing_if = "Option::is_none")]
119 pub event_id: Option<String>,
120
121 /// A specific response ID to cancel - if not provided, will cancel an
122 /// in-progress response in the default conversation.
123 #[serde(skip_serializing_if = "Option::is_none")]
124 pub response_id: Option<String>,
125}
126
127#[derive(Debug, Serialize, Deserialize, Clone, Default)]
128pub struct RealtimeClientEventOutputAudioBufferClear {
129 /// Optional client-generated ID used to identify this event.
130 #[serde(skip_serializing_if = "Option::is_none")]
131 pub event_id: Option<String>,
132}
133
134/// These are events that the OpenAI Realtime WebSocket server will accept from the client.
135#[derive(Debug, Serialize, Deserialize)]
136#[serde(tag = "type")]
137pub enum RealtimeClientEvent {
138 /// Send this event to update the session's configuration. The client may send this event at any time to update any field
139 /// except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
140 ///
141 /// When the server receives a `session.update`, it will respond with a `session.updated` event showing the full, effective
142 /// configuration. Only the fields that are present in the `session.update` are updated. To clear a field like `instructions`,
143 /// pass an empty string. To clear a field like `tools`, pass an empty array. To clear a field like `turn_detection`, pass `null`.
144 #[serde(rename = "session.update")]
145 SessionUpdate(RealtimeClientEventSessionUpdate),
146
147 /// Send this event to append audio bytes to the input audio buffer. The audio buffer is temporary storage you can write to and later commit.
148 /// A "commit" will create a new user message item in the conversation history from the buffer content and clear the buffer. Input audio
149 /// transcription (if enabled) will be generated when the buffer is committed.
150 ///
151 /// If VAD is enabled the audio buffer is used to detect speech and the server will decide when to commit. When Server VAD is disabled,
152 /// you must commit the audio buffer manually. Input audio noise reduction operates on writes to the audio buffer.
153 ///
154 /// The client may choose how much audio to place in each event up to a maximum of 15 MiB, for example streaming smaller chunks from the
155 /// client may allow the VAD to be more responsive. Unlike most other client events, the server will not send a confirmation response to
156 /// this event.
157 #[serde(rename = "input_audio_buffer.append")]
158 InputAudioBufferAppend(RealtimeClientEventInputAudioBufferAppend),
159
160 /// Send this event to commit the user input audio buffer, which will create a new user message item in the conversation.
161 /// This event will produce an error if the input audio buffer is empty.
162 /// When in Server VAD mode, the client does not need to send this event, the server will commit the audio buffer automatically.
163 /// Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model.
164 /// The server will respond with an input_audio_buffer.committed event.
165 #[serde(rename = "input_audio_buffer.commit")]
166 InputAudioBufferCommit(RealtimeClientEventInputAudioBufferCommit),
167
168 /// Send this event to clear the audio bytes in the buffer.
169 /// The server will respond with an `input_audio_buffer.cleared` event.
170 #[serde(rename = "input_audio_buffer.clear")]
171 InputAudioBufferClear(RealtimeClientEventInputAudioBufferClear),
172
173 /// Add a new Item to the Conversation's context, including messages, function calls, and function call responses.
174 /// This event can be used both to populate a "history" of the conversation and to add new items mid-stream,
175 /// but has the current limitation that it cannot populate assistant audio messages.
176 ///
177 /// If successful, the server will respond with a `conversation.item.created` event, otherwise an `error` event will be sent.
178 #[serde(rename = "conversation.item.create")]
179 ConversationItemCreate(RealtimeClientEventConversationItemCreate),
180
181 /// Send this event when you want to retrieve the server's representation of a specific item in the conversation history.
182 /// This is useful, for example, to inspect user audio after noise cancellation and VAD.
183 /// The server will respond with a `conversation.item.retrieved` event, unless the item does not exist in the conversation history,
184 /// in which case the server will respond with an error.
185 #[serde(rename = "conversation.item.retrieve")]
186 ConversationItemRetrieve(RealtimeClientEventConversationItemRetrieve),
187
188 /// Send this event to truncate a previous assistant message's audio. The server will produce audio faster than realtime,
189 /// so this event is useful when the user interrupts to truncate audio that has already been sent to the client but not
190 /// yet played. This will synchronize the server's understanding of the audio with the client's playback.
191 ///
192 /// Truncating audio will delete the server-side text transcript to ensure there is not text in the context that hasn't
193 /// been heard by the user.
194 ///
195 /// If successful, the server will respond with a `conversation.item.truncated` event.
196 #[serde(rename = "conversation.item.truncate")]
197 ConversationItemTruncate(RealtimeClientEventConversationItemTruncate),
198
199 /// Send this event when you want to remove any item from the conversation history. The server will respond with a
200 /// `conversation.item.deleted` event, unless the item does not exist in the conversation history, in which case the
201 /// server will respond with an error.
202 #[serde(rename = "conversation.item.delete")]
203 ConversationItemDelete(RealtimeClientEventConversationItemDelete),
204
205 /// This event instructs the server to create a Response, which means triggering model inference.
206 /// When in Server VAD mode, the server will create Responses automatically.
207 ///
208 /// A Response will include at least one Item, and may have two, in which case the second will be a function call.
209 /// These Items will be appended to the conversation history by default.
210 ///
211 /// The server will respond with a `response.created` event, events for Items and content created, and finally a
212 /// `response.done` event to indicate the Response is complete.
213 ///
214 /// The `response.create` event includes inference configuration like `instructions` and `tools`. If these are set, they will
215 /// override the Session's configuration for this Response only.
216 ///
217 /// Responses can be created out-of-band of the default Conversation, meaning that they can have arbitrary input, and
218 /// it's possible to disable writing the output to the Conversation. Only one Response can write to the default
219 /// Conversation at a time, but otherwise multiple Responses can be created in parallel. The `metadata` field is a good
220 /// way to disambiguate multiple simultaneous Responses.
221 ///
222 /// Clients can set `conversation` to `none` to create a Response that does not write to the default Conversation.
223 /// Arbitrary input can be provided with the `input` field, which is an array accepting raw Items and references to
224 /// existing Items.
225 #[serde(rename = "response.create")]
226 ResponseCreate(RealtimeClientEventResponseCreate),
227
228 /// Send this event to cancel an in-progress response. The server will respond with a `response.done` event
229 /// with a status of `response.status=cancelled`. If there is no response to cancel, the server will respond
230 /// with an error. It's safe to call `response.cancel` even if no response is in progress, an error will be
231 /// returned the session will remain unaffected.
232 #[serde(rename = "response.cancel")]
233 ResponseCancel(RealtimeClientEventResponseCancel),
234
235 /// **WebRTC Only:** Emit to cut off the current audio response.
236 /// This will trigger the server to stop generating audio and emit a `output_audio_buffer.cleared` event.
237 /// This event should be preceded by a `response.cancel` client event to stop the generation of the current response.
238 /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc)
239 #[serde(rename = "output_audio_buffer.clear")]
240 OutputAudioBufferClear(RealtimeClientEventOutputAudioBufferClear),
241}
242
243impl From<&RealtimeClientEvent> for String {
244 fn from(value: &RealtimeClientEvent) -> Self {
245 serde_json::to_string(value).unwrap()
246 }
247}
248
249macro_rules! event_from {
250 ($from_typ:ty, $evt_typ:ty, $variant:ident) => {
251 impl From<$from_typ> for $evt_typ {
252 fn from(value: $from_typ) -> Self {
253 <$evt_typ>::$variant(value)
254 }
255 }
256 };
257}
258
259event_from!(
260 RealtimeClientEventSessionUpdate,
261 RealtimeClientEvent,
262 SessionUpdate
263);
264event_from!(
265 RealtimeClientEventInputAudioBufferAppend,
266 RealtimeClientEvent,
267 InputAudioBufferAppend
268);
269event_from!(
270 RealtimeClientEventInputAudioBufferCommit,
271 RealtimeClientEvent,
272 InputAudioBufferCommit
273);
274event_from!(
275 RealtimeClientEventInputAudioBufferClear,
276 RealtimeClientEvent,
277 InputAudioBufferClear
278);
279event_from!(
280 RealtimeClientEventConversationItemCreate,
281 RealtimeClientEvent,
282 ConversationItemCreate
283);
284event_from!(
285 RealtimeClientEventConversationItemTruncate,
286 RealtimeClientEvent,
287 ConversationItemTruncate
288);
289event_from!(
290 RealtimeClientEventConversationItemDelete,
291 RealtimeClientEvent,
292 ConversationItemDelete
293);
294event_from!(
295 RealtimeClientEventConversationItemRetrieve,
296 RealtimeClientEvent,
297 ConversationItemRetrieve
298);
299event_from!(
300 RealtimeClientEventResponseCreate,
301 RealtimeClientEvent,
302 ResponseCreate
303);
304event_from!(
305 RealtimeClientEventResponseCancel,
306 RealtimeClientEvent,
307 ResponseCancel
308);
309event_from!(
310 RealtimeClientEventOutputAudioBufferClear,
311 RealtimeClientEvent,
312 OutputAudioBufferClear
313);
314
315impl From<RealtimeConversationItem> for RealtimeClientEventConversationItemCreate {
316 fn from(value: RealtimeConversationItem) -> Self {
317 Self {
318 event_id: None,
319 previous_item_id: None,
320 item: value,
321 }
322 }
323}
324
325impl<T: Into<RealtimeClientEvent>> ToText for T {
326 // blanket impl for all client event structs
327 fn to_text(self) -> String {
328 (&self.into()).into()
329 }
330}
331
332// Implement EventType trait for all event types in this file
333#[cfg(feature = "_api")]
334macro_rules! impl_event_type {
335 ($($ty:ty => $event_type:expr),* $(,)?) => {
336 $(
337 impl crate::traits::EventType for $ty {
338 fn event_type(&self) -> &'static str {
339 $event_type
340 }
341 }
342 )*
343 };
344}
345
346#[cfg(feature = "_api")]
347impl_event_type! {
348 RealtimeClientEventSessionUpdate => "session.update",
349 RealtimeClientEventInputAudioBufferAppend => "input_audio_buffer.append",
350 RealtimeClientEventInputAudioBufferCommit => "input_audio_buffer.commit",
351 RealtimeClientEventInputAudioBufferClear => "input_audio_buffer.clear",
352 RealtimeClientEventConversationItemCreate => "conversation.item.create",
353 RealtimeClientEventConversationItemRetrieve => "conversation.item.retrieve",
354 RealtimeClientEventConversationItemTruncate => "conversation.item.truncate",
355 RealtimeClientEventConversationItemDelete => "conversation.item.delete",
356 RealtimeClientEventResponseCreate => "response.create",
357 RealtimeClientEventResponseCancel => "response.cancel",
358 RealtimeClientEventOutputAudioBufferClear => "output_audio_buffer.clear",
359}
360
361#[cfg(feature = "_api")]
362impl crate::traits::EventType for RealtimeClientEvent {
363 fn event_type(&self) -> &'static str {
364 match self {
365 RealtimeClientEvent::SessionUpdate(e) => e.event_type(),
366 RealtimeClientEvent::InputAudioBufferAppend(e) => e.event_type(),
367 RealtimeClientEvent::InputAudioBufferCommit(e) => e.event_type(),
368 RealtimeClientEvent::InputAudioBufferClear(e) => e.event_type(),
369 RealtimeClientEvent::ConversationItemCreate(e) => e.event_type(),
370 RealtimeClientEvent::ConversationItemRetrieve(e) => e.event_type(),
371 RealtimeClientEvent::ConversationItemTruncate(e) => e.event_type(),
372 RealtimeClientEvent::ConversationItemDelete(e) => e.event_type(),
373 RealtimeClientEvent::ResponseCreate(e) => e.event_type(),
374 RealtimeClientEvent::ResponseCancel(e) => e.event_type(),
375 RealtimeClientEvent::OutputAudioBufferClear(e) => e.event_type(),
376 }
377 }
378}