1use std::collections::HashMap;
5
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8
9use crate::common::{Redacted, ResponsePrompt, ToolReference};
10
11#[serde_with::skip_serializing_none]
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct RealtimeSessionCreateRequest {
18 #[serde(rename = "type")]
19 pub r#type: RealtimeSessionType,
20 pub audio: Option<RealtimeAudioConfig>,
21 pub include: Option<Vec<RealtimeIncludeOption>>,
22 pub instructions: Option<String>,
23 pub max_output_tokens: Option<MaxOutputTokens>,
24 pub model: Option<String>,
25 #[serde(default = "audio")]
26 pub output_modalities: Option<Vec<OutputModality>>,
27 pub prompt: Option<ResponsePrompt>,
28 pub tool_choice: Option<RealtimeToolChoiceConfig>,
29 pub tools: Option<RealtimeToolsConfig>,
30 pub tracing: Option<RealtimeTracingConfig>,
31 pub truncation: Option<RealtimeTruncation>,
32}
33
34#[serde_with::skip_serializing_none]
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct RealtimeSessionCreateResponse {
41 pub client_secret: RealtimeSessionClientSecret,
42 #[serde(rename = "type")]
43 pub r#type: RealtimeSessionType,
44 pub audio: Option<RealtimeAudioConfig>,
45 pub include: Option<Vec<RealtimeIncludeOption>>,
46 pub instructions: Option<String>,
47 pub max_output_tokens: Option<MaxOutputTokens>,
48 pub model: Option<String>,
49 #[serde(default = "audio")]
50 pub output_modalities: Option<Vec<OutputModality>>,
51 pub prompt: Option<ResponsePrompt>,
52 pub tool_choice: Option<RealtimeToolChoiceConfig>,
53 pub tools: Option<Vec<RealtimeToolsConfig>>,
54 pub tracing: Option<RealtimeTracingConfig>,
55 pub truncation: Option<RealtimeTruncation>,
56}
57
58#[serde_with::skip_serializing_none]
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct RealtimeTranscriptionSessionCreateRequest {
65 #[serde(rename = "type")]
66 pub r#type: RealtimeTranscriptionSessionType,
67 pub audio: Option<RealtimeTranscriptionSessionAudio>,
68 pub include: Option<Vec<RealtimeIncludeOption>>,
69}
70
71#[serde_with::skip_serializing_none]
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct RealtimeTranscriptionSessionCreateResponse {
78 pub id: String,
79 pub object: String,
80 #[serde(rename = "type")]
81 pub r#type: RealtimeTranscriptionSessionType,
82 pub audio: Option<RealtimeTranscriptionSessionResponseAudio>,
83 pub expires_at: Option<i64>,
84 pub include: Option<Vec<RealtimeIncludeOption>>,
85}
86
87#[derive(Debug, Clone, Serialize, Deserialize)]
92#[serde(tag = "type")]
93pub enum RealtimeAudioFormats {
94 #[serde(rename = "audio/pcm")]
95 Pcm {
96 #[serde(skip_serializing_if = "Option::is_none")]
98 rate: Option<u32>,
99 },
100 #[serde(rename = "audio/pcmu")]
101 Pcmu,
102 #[serde(rename = "audio/pcma")]
103 Pcma,
104}
105
106#[serde_with::skip_serializing_none]
111#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct AudioTranscription {
113 pub language: Option<String>,
114 pub model: Option<String>,
115 pub prompt: Option<String>,
116}
117
118#[derive(Debug, Clone, Serialize, Deserialize)]
123#[serde(rename_all = "snake_case")]
124pub enum NoiseReductionType {
125 NearField,
126 FarField,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct NoiseReduction {
131 #[serde(rename = "type")]
132 pub r#type: NoiseReductionType,
133}
134
135#[derive(Debug, Clone, Serialize, Deserialize, Default)]
142#[serde(rename_all = "snake_case")]
143pub enum SemanticVadEagerness {
144 Low,
145 Medium,
146 High,
147 #[default]
148 Auto,
149}
150
151#[serde_with::skip_serializing_none]
152#[derive(Debug, Clone, Serialize, Deserialize)]
153#[serde(tag = "type")]
154pub enum TurnDetection {
155 #[serde(rename = "server_vad")]
156 ServerVad {
157 create_response: Option<bool>,
158 idle_timeout_ms: Option<u32>,
159 interrupt_response: Option<bool>,
160 prefix_padding_ms: Option<u32>,
161 silence_duration_ms: Option<u32>,
162 threshold: Option<f64>,
163 },
164 #[serde(rename = "semantic_vad")]
165 SemanticVad {
166 create_response: Option<bool>,
167 eagerness: Option<SemanticVadEagerness>,
168 interrupt_response: Option<bool>,
169 },
170}
171
172#[serde_with::skip_serializing_none]
174#[derive(Debug, Clone, Serialize, Deserialize)]
175#[serde(tag = "type")]
176pub enum RealtimeTranscriptionSessionTurnDetection {
177 #[serde(rename = "server_vad")]
178 ServerVad {
179 prefix_padding_ms: Option<u32>,
180 silence_duration_ms: Option<u32>,
181 threshold: Option<f64>,
182 },
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize)]
196#[serde(untagged)]
197pub enum Voice {
198 VoiceIDsShared(String),
199 Custom { id: String },
200}
201
202#[derive(Debug, Clone, Serialize, Deserialize)]
207#[serde(rename_all = "snake_case")]
208pub enum OutputModality {
209 Text,
210 Audio,
211}
212
213#[expect(
214 clippy::unnecessary_wraps,
215 reason = "must return Option to match serde default field type"
216)]
217fn audio() -> Option<Vec<OutputModality>> {
218 Some(vec![OutputModality::Audio])
219}
220
221#[serde_with::skip_serializing_none]
226#[derive(Debug, Clone, Serialize, Deserialize)]
227pub struct TracingConfig {
228 pub group_id: Option<String>,
229 pub metadata: Option<Value>,
230 pub workflow_name: Option<String>,
231}
232
233#[derive(Debug, Clone, Serialize, Deserialize)]
235pub enum TracingMode {
236 #[serde(rename = "auto")]
237 Auto,
238}
239
240#[derive(Debug, Clone, Serialize, Deserialize)]
242#[serde(untagged)]
243pub enum RealtimeTracingConfig {
244 Mode(TracingMode),
245 Config(TracingConfig),
246}
247
248#[derive(Debug, Clone, Serialize, Deserialize)]
253#[serde(rename_all = "snake_case")]
254#[expect(
255 clippy::enum_variant_names,
256 reason = "variant names match OpenAI Realtime API spec"
257)]
258pub enum ConnectorId {
259 ConnectorDropbox,
260 ConnectorGmail,
261 ConnectorGooglecalendar,
262 ConnectorGoogledrive,
263 ConnectorMicrosoftteams,
264 ConnectorOutlookcalendar,
265 ConnectorOutlookemail,
266 ConnectorSharepoint,
267}
268
269#[serde_with::skip_serializing_none]
274#[derive(Debug, Clone, Serialize, Deserialize)]
275#[serde(tag = "type")]
276pub enum RealtimeToolsConfig {
277 #[serde(rename = "function")]
278 RealtimeFunctionTool {
279 description: Option<String>,
280 name: Option<String>,
281 parameters: Option<Value>,
282 },
283 #[serde(rename = "mcp")]
284 McpTool {
285 server_label: String,
286 allowed_tools: Option<McpAllowedTools>,
287 authorization: Option<Redacted>,
288 connector_id: Option<ConnectorId>,
289 headers: Option<HashMap<String, Redacted>>,
290 require_approval: Option<McpToolApproval>,
291 server_description: Option<String>,
292 server_url: Option<String>,
293 },
294}
295
296#[derive(Debug, Clone, Serialize, Deserialize)]
305#[serde(untagged)]
306pub enum McpAllowedTools {
307 List(Vec<String>),
308 Filter(McpToolFilter),
309}
310
311#[serde_with::skip_serializing_none]
313#[derive(Debug, Clone, Serialize, Deserialize)]
314pub struct McpToolFilter {
315 pub read_only: Option<bool>,
316 pub tool_names: Option<Vec<String>>,
317}
318
319#[derive(Debug, Clone, Serialize, Deserialize)]
324#[serde(untagged)]
325pub enum McpToolApproval {
326 Setting(McpToolApprovalSetting),
327 Filter(McpToolApprovalFilter),
328}
329
330#[derive(Debug, Clone, Serialize, Deserialize)]
332#[serde(rename_all = "snake_case")]
333pub enum McpToolApprovalSetting {
334 Always,
335 Never,
336}
337
338#[serde_with::skip_serializing_none]
340#[derive(Debug, Clone, Serialize, Deserialize)]
341pub struct McpToolApprovalFilter {
342 pub always: Option<McpToolFilter>,
343 pub never: Option<McpToolFilter>,
344}
345
346#[derive(Debug, Clone, Serialize, Deserialize)]
356#[serde(untagged)]
357pub enum RealtimeToolChoiceConfig {
358 Options(ToolChoiceOptions),
359 Reference(ToolReference),
360}
361
362#[derive(Debug, Clone, Serialize, Deserialize)]
364#[serde(rename_all = "snake_case")]
365pub enum ToolChoiceOptions {
366 None,
367 Auto,
368 Required,
369}
370
371#[derive(Debug, Clone, Serialize, Deserialize)]
382#[serde(untagged)]
383pub enum MaxOutputTokens {
384 Integer(u32),
386 Inf(InfMarker),
387}
388
389impl Default for MaxOutputTokens {
390 fn default() -> Self {
391 Self::Inf(InfMarker::Inf)
392 }
393}
394
395#[derive(Debug, Clone, Serialize, Deserialize)]
397pub enum InfMarker {
398 #[serde(rename = "inf")]
399 Inf,
400}
401
402#[serde_with::skip_serializing_none]
407#[derive(Debug, Clone, Serialize, Deserialize)]
408pub struct TruncationTokenLimits {
409 pub post_instructions: Option<u32>,
410}
411
412#[derive(Debug, Clone, Serialize, Deserialize)]
414pub enum RetentionRatioTruncationType {
415 #[serde(rename = "retention_ratio")]
416 RetentionRatio,
417}
418
419#[serde_with::skip_serializing_none]
420#[derive(Debug, Clone, Serialize, Deserialize)]
421pub struct RetentionRatioTruncation {
422 pub retention_ratio: f64,
423 #[serde(rename = "type")]
424 pub r#type: RetentionRatioTruncationType,
425 pub token_limits: Option<TruncationTokenLimits>,
426}
427
428#[derive(Debug, Clone, Serialize, Deserialize, Default)]
430#[serde(rename_all = "snake_case")]
431pub enum TruncationMode {
432 #[default]
433 Auto,
434 Disabled,
435}
436
437#[derive(Debug, Clone, Serialize, Deserialize)]
439#[serde(untagged)]
440pub enum RealtimeTruncation {
441 Mode(TruncationMode),
442 RetentionRatio(RetentionRatioTruncation),
443}
444
445#[derive(Debug, Clone, Serialize, Deserialize)]
450pub struct RealtimeSessionClientSecret {
451 pub expires_at: i64,
452 pub value: Redacted,
453}
454
455#[serde_with::skip_serializing_none]
460#[derive(Debug, Clone, Serialize, Deserialize)]
461pub struct RealtimeAudioConfigInput {
462 pub format: Option<RealtimeAudioFormats>,
463 pub noise_reduction: Option<NoiseReduction>,
464 pub transcription: Option<AudioTranscription>,
465 pub turn_detection: Option<TurnDetection>,
466}
467
468#[serde_with::skip_serializing_none]
469#[derive(Debug, Clone, Serialize, Deserialize)]
470pub struct RealtimeAudioConfigOutput {
471 pub format: Option<RealtimeAudioFormats>,
472 pub speed: Option<f64>,
473 pub voice: Option<Voice>,
474}
475
476#[serde_with::skip_serializing_none]
477#[derive(Debug, Clone, Serialize, Deserialize)]
478pub struct RealtimeAudioConfig {
479 pub input: Option<RealtimeAudioConfigInput>,
480 pub output: Option<RealtimeAudioConfigOutput>,
481}
482
483#[serde_with::skip_serializing_none]
484#[derive(Debug, Clone, Serialize, Deserialize)]
485pub struct RealtimeTranscriptionSessionAudio {
486 pub input: Option<RealtimeAudioConfigInput>,
487}
488
489#[serde_with::skip_serializing_none]
490#[derive(Debug, Clone, Serialize, Deserialize)]
491pub struct RealtimeTranscriptionSessionResponseAudio {
492 pub input: Option<RealtimeTranscriptionSessionResponseAudioConfigInput>,
493}
494
495#[serde_with::skip_serializing_none]
496#[derive(Debug, Clone, Serialize, Deserialize)]
497pub struct RealtimeTranscriptionSessionResponseAudioConfigInput {
498 pub format: Option<RealtimeAudioFormats>,
499 pub noise_reduction: Option<NoiseReduction>,
500 pub transcription: Option<AudioTranscription>,
501 pub turn_detection: Option<RealtimeTranscriptionSessionTurnDetection>,
502}
503
504#[derive(Debug, Clone, Serialize, Deserialize)]
509pub enum RealtimeIncludeOption {
510 #[serde(rename = "item.input_audio_transcription.logprobs")]
511 InputAudioTranscriptionLogprobs,
512}
513
514#[derive(Debug, Clone, Serialize, Deserialize)]
520pub enum RealtimeSessionType {
521 #[serde(rename = "realtime")]
522 Realtime,
523}
524
525#[derive(Debug, Clone, Serialize, Deserialize)]
531pub enum RealtimeTranscriptionSessionType {
532 #[serde(rename = "transcription")]
533 Transcription,
534}