1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
use serde::{Deserialize, Serialize};
use crate::spec::mcp::MCPListToolsTool;
use crate::spec::realtime::{ErrorCodeMessage, ErrorMessage};
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct SystemMessageContent {
/// The text content.
pub text: String,
/// The content type. Always `input_text` for system messages.
pub kind: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RealtimeConversationItemMessageSystem {
/// The content of the message.
pub content: Vec<SystemMessageContent>,
/// The unique ID of the item. This may be provided by the client or generated by the server.
pub id: Option<String>,
/// Identifier for the API object being returned - always `realtime.item`.
/// Optional when creating a new item.
pub object: Option<String>,
/// The status of the item. Has no effect on the conversation.
pub status: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct UserMessageContentInputText {
/// The text content (for `input_text`).
pub text: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct UserMessageContentInputAudio {
/// Base64-encoded audio bytes (for `input_audio`), these will be parsed as the
/// format specified in the session input audio type configuration.
/// This defaults to PCM 16-bit 24kHz mono if not specified.
pub audio: String,
/// Transcript of the audio (for `input_audio`). This is not sent to the model,
/// but will be attached to the message item for reference.
pub transcript: String,
}
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
#[serde(rename_all = "snake_case")]
pub enum ImageDetail {
#[default]
Auto,
Low,
High,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct UserMessageContentInputImage {
/// Base64-encoded image bytes (for `input_image`) as a data URI.
/// For example `data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...`.
/// Supported formats are PNG and JPEG.
pub image_url: String,
/// The detail level of the image (for `input_image`). `auto` will default to `high`.
pub detail: ImageDetail,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(tag = "type")]
#[serde(rename_all = "snake_case")]
pub enum UserMessageContent {
InputText(UserMessageContentInputText),
InputAudio(UserMessageContentInputAudio),
InputImage(UserMessageContentInputImage),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RealtimeConversationItemMessageUser {
/// The content of the message.
pub content: Vec<UserMessageContent>,
/// The unique ID of the item. This may be provided by the client or generated by the server.
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
/// Identifier for the API object being returned - always `realtime.item`.
/// Optional when creating a new item.
#[serde(skip_serializing_if = "Option::is_none")]
pub object: Option<String>,
/// The status of the item. Has no effect on the conversation.
#[serde(skip_serializing_if = "Option::is_none")]
pub status: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct AssistantMessageContentOutputText {
/// The text content
pub text: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct AssistantMessageContentOutputAudio {
/// Base64-encoded audio bytes, these will be parsed as the format specified
/// in the session output audio type configuration. This defaults to PCM 16-bit
/// 24kHz mono if not specified.
pub audio: Option<String>,
/// The transcript of the audio content, this will always be present if the
/// output type is `audio`.
pub transcript: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(tag = "type")]
#[serde(rename_all = "snake_case")]
pub enum AssistantMessageContent {
OutputText(AssistantMessageContentOutputText),
OutputAudio(AssistantMessageContentOutputAudio),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RealtimeConversationItemMessageAssistant {
/// The content of the message.
pub content: Vec<AssistantMessageContent>,
/// The unique ID of the item. This may be provided by the client or generated by the server.
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
/// Identifier for the API object being returned - always `realtime.item`.
/// Optional when creating a new item.
#[serde(skip_serializing_if = "Option::is_none")]
pub object: Option<String>,
/// The status of the item. Has no effect on the conversation.
#[serde(skip_serializing_if = "Option::is_none")]
pub status: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(tag = "role")]
#[serde(rename_all = "lowercase")]
pub enum RealtimeConversationItemMessage {
System(RealtimeConversationItemMessageSystem),
User(RealtimeConversationItemMessageUser),
Assistant(RealtimeConversationItemMessageAssistant),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RealtimeConversationItemFunctionCall {
/// The arguments of the function call. This is a JSON-encoded string representing
/// the arguments passed to the function, for example {"arg1": "value1", "arg2": 42}.
pub arguments: String,
/// The name of the function being called.
pub name: String,
/// The ID of the function call.
pub call_id: String,
/// The unique ID of the item. This may be provided by the client or generated by the server.
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
/// Identifier for the API object being returned - always `realtime.item`.
/// Optional when creating a new item.
#[serde(skip_serializing_if = "Option::is_none")]
pub object: Option<String>,
/// The status of the item. Has no effect on the conversation.
pub status: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RealtimeConversationItemFunctionCallOutput {
/// The ID of the function call this output is for.
pub call_id: String,
/// The output of the function call, this is free text and can contain any information
/// or simply be empty.
pub output: String,
/// The unique ID of the item. This may be provided by the client or generated by the server.
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
/// Identifier for the API object being returned - always `realtime.item`.
/// Optional when creating a new item.
#[serde(skip_serializing_if = "Option::is_none")]
pub object: Option<String>,
/// The status of the item. Has no effect on the conversation.
pub status: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RealtimeMCPApprovalResponse {
/// The ID of the approval request being answered.
pub approval_request_id: String,
/// Whether the request was approved.
pub approved: bool,
/// The unique ID of the approval response.
pub id: String,
/// Optional reason for the decision.
pub reason: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RealtimeMCPListTools {
/// The label of the MCP server.
pub server_label: String,
/// The tools available on the server.
pub tools: Vec<MCPListToolsTool>,
/// The unique ID of the list.
pub id: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RealtimeMCPApprovalRequest {
/// A JSON string of arguments for the tool.
pub arguments: String,
/// The unique ID of the approval request.
pub id: String,
/// The name of the tool to run.
pub name: String,
/// The label of the MCP server making the request.
pub server_label: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RealtimeMCPProtocolError {}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum RealtimeMCPToolCallError {
ProtocolError(ErrorCodeMessage),
ToolExecutionError(ErrorMessage),
HttpError(ErrorCodeMessage),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct RealtimeMCPToolCall {
/// A JSON string of the arguments passed to the tool.
pub arguments: String,
/// The unique ID of the tool call.
pub id: String,
/// The name of the tool that was run.
pub name: String,
/// The label of the MCP server running the tool.
pub server_label: String,
/// The ID of an associated approval request, if any.
pub approval_request_id: Option<String>,
/// The error from the tool call, if any.
pub error: Option<RealtimeMCPToolCallError>,
/// The output from the tool call.
pub output: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum RealtimeConversationItem {
Message(RealtimeConversationItemMessage),
FunctionCall(RealtimeConversationItemFunctionCall),
FunctionCallOutput(RealtimeConversationItemFunctionCallOutput),
McpApprovalResponse(RealtimeMCPApprovalResponse),
McpListTools(RealtimeMCPListTools),
McpCall(RealtimeMCPToolCall),
McpApprovalRequest(RealtimeMCPApprovalRequest),
}
impl TryFrom<serde_json::Value> for RealtimeConversationItem {
type Error = serde_json::Error;
fn try_from(value: serde_json::Value) -> Result<Self, Self::Error> {
serde_json::from_value(value)
}
}