outfox-openai 0.7.0

Openai for outfox
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
use serde::{Deserialize, Serialize};

use crate::spec::realtime::{RealtimeConversationItem, RealtimeResponseCreateParams, Session};

#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RealtimeClientEventSessionUpdate {
    /// Optional client-generated ID used to identify this event.
    /// This is an arbitrary string that a client may assign. It will be passed
    /// back if there is an error with the event, but the corresponding
    /// `session.updated` event will not include it.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub event_id: Option<String>,
    /// Update the Realtime session. Choose either a realtime session or a transcription session.
    pub session: Session,
}

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct RealtimeClientEventInputAudioBufferAppend {
    /// Optional client-generated ID used to identify this event.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub event_id: Option<String>,
    /// Base64-encoded audio bytes. This must be in the format specified by
    /// the `input_audio_format` field in the session configuration.
    pub audio: String,
}

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct RealtimeClientEventInputAudioBufferCommit {
    /// Optional client-generated ID used to identify this event.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub event_id: Option<String>,
}

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct RealtimeClientEventInputAudioBufferClear {
    /// Optional client-generated ID used to identify this event.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub event_id: Option<String>,
}

#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RealtimeClientEventConversationItemCreate {
    /// Optional client-generated ID used to identify this event.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub event_id: Option<String>,

    /// The ID of the preceding item after which the new item will be inserted.
    /// If not set, the new item will be appended to the end of the conversation.
    /// If set to `root`, the new item will be added to the beginning of the conversation.
    /// If set to an existing ID, it allows an item to be inserted mid-conversation.
    /// If the ID cannot be found, an error will be returned and the item will not be added.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub previous_item_id: Option<String>,

    /// A single item within a Realtime conversation.
    pub item: RealtimeConversationItem,
}

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct RealtimeClientEventConversationItemRetrieve {
    /// Optional client-generated ID used to identify this event.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub event_id: Option<String>,

    /// The ID of the item to retrieve.
    pub item_id: String,
}

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct RealtimeClientEventConversationItemTruncate {
    /// Optional client-generated ID used to identify this event.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub event_id: Option<String>,

    /// The ID of the assistant message item to truncate. Only assistant message items can be truncated.
    pub item_id: String,

    /// The index of the content part to truncate. Set this to `0`.
    pub content_index: u32,

    /// Inclusive duration up to which audio is truncated, in milliseconds.
    /// If the audio_end_ms is greater than the actual audio duration, the server will respond with an error.
    pub audio_end_ms: u32,
}

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct RealtimeClientEventConversationItemDelete {
    /// Optional client-generated ID used to identify this event.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub event_id: Option<String>,

    /// The ID of the item to delete.
    pub item_id: String,
}

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct RealtimeClientEventResponseCreate {
    /// Optional client-generated ID used to identify this event.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub event_id: Option<String>,

    /// Create a new Realtime response with these parameters
    pub response: Option<RealtimeResponseCreateParams>,
}

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct RealtimeClientEventResponseCancel {
    /// Optional client-generated ID used to identify this event.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub event_id: Option<String>,

    /// A specific response ID to cancel - if not provided, will cancel an
    /// in-progress response in the default conversation.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub response_id: Option<String>,
}

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct RealtimeClientEventOutputAudioBufferClear {
    /// Optional client-generated ID used to identify this event.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub event_id: Option<String>,
}

/// These are events that the OpenAI Realtime WebSocket server will accept from the client.
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum RealtimeClientEvent {
    /// Send this event to update the session's configuration. The client may send this event at any time to update any field
    /// except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
    ///
    /// When the server receives a `session.update`, it will respond with a `session.updated` event showing the full, effective
    /// configuration. Only the fields that are present in the `session.update` are updated. To clear a field like `instructions`,
    /// pass an empty string. To clear a field like `tools`, pass an empty array. To clear a field like `turn_detection`, pass `null`.
    #[serde(rename = "session.update")]
    SessionUpdate(RealtimeClientEventSessionUpdate),

    /// Send this event to append audio bytes to the input audio buffer. The audio buffer is temporary storage you can write to and later commit.
    /// A "commit" will create a new user message item in the conversation history from the buffer content and clear the buffer. Input audio
    /// transcription (if enabled) will be generated when the buffer is committed.
    ///
    /// If VAD is enabled the audio buffer is used to detect speech and the server will decide when to commit. When Server VAD is disabled,
    /// you must commit the audio buffer manually. Input audio noise reduction operates on writes to the audio buffer.
    ///
    /// The client may choose how much audio to place in each event up to a maximum of 15 MiB, for example streaming smaller chunks from the
    /// client may allow the VAD to be more responsive. Unlike most other client events, the server will not send a confirmation response to
    /// this event.
    #[serde(rename = "input_audio_buffer.append")]
    InputAudioBufferAppend(RealtimeClientEventInputAudioBufferAppend),

    /// Send this event to commit the user input audio buffer, which will create a new user message item in the conversation.
    /// This event will produce an error if the input audio buffer is empty.
    /// When in Server VAD mode, the client does not need to send this event, the server will commit the audio buffer automatically.
    /// Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model.
    /// The server will respond with an input_audio_buffer.committed event.
    #[serde(rename = "input_audio_buffer.commit")]
    InputAudioBufferCommit(RealtimeClientEventInputAudioBufferCommit),

    /// Send this event to clear the audio bytes in the buffer.
    /// The server will respond with an `input_audio_buffer.cleared` event.
    #[serde(rename = "input_audio_buffer.clear")]
    InputAudioBufferClear(RealtimeClientEventInputAudioBufferClear),

    /// Add a new Item to the Conversation's context, including messages, function calls, and function call responses.
    /// This event can be used both to populate a "history" of the conversation and to add new items mid-stream,
    /// but has the current limitation that it cannot populate assistant audio messages.
    ///
    /// If successful, the server will respond with a `conversation.item.created` event, otherwise an `error` event will be sent.
    #[serde(rename = "conversation.item.create")]
    ConversationItemCreate(RealtimeClientEventConversationItemCreate),

    /// Send this event when you want to retrieve the server's representation of a specific item in the conversation history.
    /// This is useful, for example, to inspect user audio after noise cancellation and VAD.
    /// The server will respond with a `conversation.item.retrieved` event, unless the item does not exist in the conversation history,
    /// in which case the server will respond with an error.
    #[serde(rename = "conversation.item.retrieve")]
    ConversationItemRetrieve(RealtimeClientEventConversationItemRetrieve),

    /// Send this event to truncate a previous assistant message's audio. The server will produce audio faster than realtime,
    /// so this event is useful when the user interrupts to truncate audio that has already been sent to the client but not
    /// yet played. This will synchronize the server's understanding of the audio with the client's playback.
    ///
    /// Truncating audio will delete the server-side text transcript to ensure there is not text in the context that hasn't
    /// been heard by the user.
    ///
    /// If successful, the server will respond with a `conversation.item.truncated` event.
    #[serde(rename = "conversation.item.truncate")]
    ConversationItemTruncate(RealtimeClientEventConversationItemTruncate),

    /// Send this event when you want to remove any item from the conversation history. The server will respond with a
    /// `conversation.item.deleted` event, unless the item does not exist in the conversation history, in which case the
    /// server will respond with an error.
    #[serde(rename = "conversation.item.delete")]
    ConversationItemDelete(RealtimeClientEventConversationItemDelete),

    /// This event instructs the server to create a Response, which means triggering model inference.
    /// When in Server VAD mode, the server will create Responses automatically.
    ///
    /// A Response will include at least one Item, and may have two, in which case the second will be a function call.
    /// These Items will be appended to the conversation history by default.
    ///
    /// The server will respond with a `response.created` event, events for Items and content created, and finally a
    /// `response.done` event to indicate the Response is complete.
    ///
    /// The `response.create` event includes inference configuration like `instructions` and `tools`. If these are set, they will
    /// override the Session's configuration for this Response only.
    ///
    /// Responses can be created out-of-band of the default Conversation, meaning that they can have arbitrary input, and
    /// it's possible to disable writing the output to the Conversation. Only one Response can write to the default
    /// Conversation at a time, but otherwise multiple Responses can be created in parallel. The `metadata` field is a good
    /// way to disambiguate multiple simultaneous Responses.
    ///
    /// Clients can set `conversation` to `none` to create a Response that does not write to the default Conversation.
    /// Arbitrary input can be provided with the `input` field, which is an array accepting raw Items and references to
    /// existing Items.
    #[serde(rename = "response.create")]
    ResponseCreate(RealtimeClientEventResponseCreate),

    /// Send this event to cancel an in-progress response. The server will respond with a `response.done` event
    /// with a status of `response.status=cancelled`. If there is no response to cancel, the server will respond
    /// with an error. It's safe to call `response.cancel` even if no response is in progress, an error will be
    /// returned the session will remain unaffected.
    #[serde(rename = "response.cancel")]
    ResponseCancel(RealtimeClientEventResponseCancel),

    /// **WebRTC/SIP Only:** Emit to cut off the current audio response.
    /// This will trigger the server to stop generating audio and emit a `output_audio_buffer.cleared` event.
    /// This event should be preceded by a `response.cancel` client event to stop the generation of the current response.
    /// [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc)
    #[serde(rename = "output_audio_buffer.clear")]
    OutputAudioBufferClear(RealtimeClientEventOutputAudioBufferClear),
}

impl From<&RealtimeClientEvent> for String {
    fn from(value: &RealtimeClientEvent) -> Self {
        serde_json::to_string(value).unwrap()
    }
}

macro_rules! event_from {
    ($from_typ:ty, $evt_typ:ty, $variant:ident) => {
        impl From<$from_typ> for $evt_typ {
            fn from(value: $from_typ) -> Self {
                <$evt_typ>::$variant(value)
            }
        }
    };
}

event_from!(
    RealtimeClientEventSessionUpdate,
    RealtimeClientEvent,
    SessionUpdate
);
event_from!(
    RealtimeClientEventInputAudioBufferAppend,
    RealtimeClientEvent,
    InputAudioBufferAppend
);
event_from!(
    RealtimeClientEventInputAudioBufferCommit,
    RealtimeClientEvent,
    InputAudioBufferCommit
);
event_from!(
    RealtimeClientEventInputAudioBufferClear,
    RealtimeClientEvent,
    InputAudioBufferClear
);
event_from!(
    RealtimeClientEventConversationItemCreate,
    RealtimeClientEvent,
    ConversationItemCreate
);
event_from!(
    RealtimeClientEventConversationItemTruncate,
    RealtimeClientEvent,
    ConversationItemTruncate
);
event_from!(
    RealtimeClientEventConversationItemDelete,
    RealtimeClientEvent,
    ConversationItemDelete
);
event_from!(
    RealtimeClientEventConversationItemRetrieve,
    RealtimeClientEvent,
    ConversationItemRetrieve
);
event_from!(
    RealtimeClientEventResponseCreate,
    RealtimeClientEvent,
    ResponseCreate
);
event_from!(
    RealtimeClientEventResponseCancel,
    RealtimeClientEvent,
    ResponseCancel
);
event_from!(
    RealtimeClientEventOutputAudioBufferClear,
    RealtimeClientEvent,
    OutputAudioBufferClear
);

impl From<RealtimeConversationItem> for RealtimeClientEventConversationItemCreate {
    fn from(value: RealtimeConversationItem) -> Self {
        Self {
            event_id: None,
            previous_item_id: None,
            item: value,
        }
    }
}

#[cfg(all(feature = "_api", not(target_family = "wasm")))]
impl From<RealtimeClientEvent> for tokio_tungstenite::tungstenite::Message {
    fn from(value: RealtimeClientEvent) -> Self {
        tokio_tungstenite::tungstenite::Message::Text(String::from(&value).into())
    }
}

#[cfg(all(feature = "_api", not(target_family = "wasm")))]
macro_rules! message_from_event {
    ($from_typ:ty, $evt_typ:ty) => {
        impl From<$from_typ> for tokio_tungstenite::tungstenite::Message {
            fn from(value: $from_typ) -> Self {
                Self::from(<$evt_typ>::from(value))
            }
        }
    };
}

#[cfg(all(feature = "_api", not(target_family = "wasm")))]
message_from_event!(RealtimeClientEventSessionUpdate, RealtimeClientEvent);
#[cfg(all(feature = "_api", not(target_family = "wasm")))]
message_from_event!(
    RealtimeClientEventInputAudioBufferAppend,
    RealtimeClientEvent
);
#[cfg(all(feature = "_api", not(target_family = "wasm")))]
message_from_event!(
    RealtimeClientEventInputAudioBufferCommit,
    RealtimeClientEvent
);
#[cfg(all(feature = "_api", not(target_family = "wasm")))]
message_from_event!(
    RealtimeClientEventInputAudioBufferClear,
    RealtimeClientEvent
);
#[cfg(all(feature = "_api", not(target_family = "wasm")))]
message_from_event!(
    RealtimeClientEventConversationItemCreate,
    RealtimeClientEvent
);
#[cfg(all(feature = "_api", not(target_family = "wasm")))]
message_from_event!(
    RealtimeClientEventConversationItemTruncate,
    RealtimeClientEvent
);
#[cfg(all(feature = "_api", not(target_family = "wasm")))]
message_from_event!(
    RealtimeClientEventConversationItemDelete,
    RealtimeClientEvent
);
#[cfg(all(feature = "_api", not(target_family = "wasm")))]
message_from_event!(
    RealtimeClientEventConversationItemRetrieve,
    RealtimeClientEvent
);
#[cfg(all(feature = "_api", not(target_family = "wasm")))]
message_from_event!(RealtimeClientEventResponseCreate, RealtimeClientEvent);
#[cfg(all(feature = "_api", not(target_family = "wasm")))]
message_from_event!(RealtimeClientEventResponseCancel, RealtimeClientEvent);
#[cfg(all(feature = "_api", not(target_family = "wasm")))]
message_from_event!(
    RealtimeClientEventOutputAudioBufferClear,
    RealtimeClientEvent
);

// Implement EventType trait for all event types in this file
#[cfg(feature = "_api")]
macro_rules! impl_event_type {
    ($($ty:ty => $event_type:expr),* $(,)?) => {
        $(
            impl crate::traits::EventType for $ty {
                fn event_type(&self) -> &'static str {
                    $event_type
                }
            }
        )*
    };
}

#[cfg(feature = "_api")]
impl_event_type! {
    RealtimeClientEventSessionUpdate => "session.update",
    RealtimeClientEventInputAudioBufferAppend => "input_audio_buffer.append",
    RealtimeClientEventInputAudioBufferCommit => "input_audio_buffer.commit",
    RealtimeClientEventInputAudioBufferClear => "input_audio_buffer.clear",
    RealtimeClientEventConversationItemCreate => "conversation.item.create",
    RealtimeClientEventConversationItemRetrieve => "conversation.item.retrieve",
    RealtimeClientEventConversationItemTruncate => "conversation.item.truncate",
    RealtimeClientEventConversationItemDelete => "conversation.item.delete",
    RealtimeClientEventResponseCreate => "response.create",
    RealtimeClientEventResponseCancel => "response.cancel",
    RealtimeClientEventOutputAudioBufferClear => "output_audio_buffer.clear",
}

#[cfg(feature = "_api")]
impl crate::traits::EventType for RealtimeClientEvent {
    fn event_type(&self) -> &'static str {
        match self {
            RealtimeClientEvent::SessionUpdate(e) => e.event_type(),
            RealtimeClientEvent::InputAudioBufferAppend(e) => e.event_type(),
            RealtimeClientEvent::InputAudioBufferCommit(e) => e.event_type(),
            RealtimeClientEvent::InputAudioBufferClear(e) => e.event_type(),
            RealtimeClientEvent::ConversationItemCreate(e) => e.event_type(),
            RealtimeClientEvent::ConversationItemRetrieve(e) => e.event_type(),
            RealtimeClientEvent::ConversationItemTruncate(e) => e.event_type(),
            RealtimeClientEvent::ConversationItemDelete(e) => e.event_type(),
            RealtimeClientEvent::ResponseCreate(e) => e.event_type(),
            RealtimeClientEvent::ResponseCancel(e) => e.event_type(),
            RealtimeClientEvent::OutputAudioBufferClear(e) => e.event_type(),
        }
    }
}