1use std::collections::HashMap;
5
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use validator::{Validate, ValidationError};
9
10use crate::{
11 common::{Redacted, ResponsePrompt, ToolReference},
12 validated::Normalizable,
13};
14
15#[serde_with::skip_serializing_none]
20#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
21#[validate(schema(function = "validate_session_create_request"))]
22pub struct RealtimeSessionCreateRequest {
23 #[serde(rename = "type")]
24 pub r#type: RealtimeSessionType,
25 pub audio: Option<RealtimeAudioConfig>,
26 pub include: Option<Vec<RealtimeIncludeOption>>,
27 pub instructions: Option<String>,
28 pub max_output_tokens: Option<MaxOutputTokens>,
29 pub model: Option<String>,
30 #[serde(default = "audio")]
31 pub output_modalities: Option<Vec<OutputModality>>,
32 pub prompt: Option<ResponsePrompt>,
33 pub tool_choice: Option<RealtimeToolChoiceConfig>,
34 pub tools: Option<RealtimeToolsConfig>,
35 pub tracing: Option<RealtimeTracingConfig>,
36 pub truncation: Option<RealtimeTruncation>,
37}
38
39impl Normalizable for RealtimeSessionCreateRequest {}
40
41fn validate_session_create_request(
42 req: &RealtimeSessionCreateRequest,
43) -> Result<(), ValidationError> {
44 let has_model = req.model.as_deref().is_some_and(|m| !m.trim().is_empty());
45 if !has_model {
46 return Err(ValidationError::new("model is required"));
47 }
48 Ok(())
49}
50
51#[serde_with::skip_serializing_none]
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct RealtimeSessionCreateResponse {
58 pub client_secret: RealtimeSessionClientSecret,
59 #[serde(rename = "type")]
60 pub r#type: RealtimeSessionType,
61 pub audio: Option<RealtimeAudioConfig>,
62 pub include: Option<Vec<RealtimeIncludeOption>>,
63 pub instructions: Option<String>,
64 pub max_output_tokens: Option<MaxOutputTokens>,
65 pub model: Option<String>,
66 #[serde(default = "audio")]
67 pub output_modalities: Option<Vec<OutputModality>>,
68 pub prompt: Option<ResponsePrompt>,
69 pub tool_choice: Option<RealtimeToolChoiceConfig>,
70 pub tools: Option<Vec<RealtimeToolsConfig>>,
71 pub tracing: Option<RealtimeTracingConfig>,
72 pub truncation: Option<RealtimeTruncation>,
73}
74
75#[serde_with::skip_serializing_none]
80#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
81#[validate(schema(function = "validate_transcription_session_create_request"))]
82pub struct RealtimeTranscriptionSessionCreateRequest {
83 #[serde(rename = "type")]
84 pub r#type: RealtimeTranscriptionSessionType,
85 pub audio: Option<RealtimeTranscriptionSessionAudio>,
86 pub include: Option<Vec<RealtimeIncludeOption>>,
87 pub model: Option<String>,
88 pub language: Option<String>,
89 pub prompt: Option<String>,
90}
91
92impl Normalizable for RealtimeTranscriptionSessionCreateRequest {
93 }
95
96fn validate_transcription_session_create_request(
97 req: &RealtimeTranscriptionSessionCreateRequest,
98) -> Result<(), ValidationError> {
99 let has_model = req.model.as_deref().is_some_and(|m| !m.trim().is_empty());
100 if !has_model {
101 return Err(ValidationError::new("model is required"));
102 }
103 Ok(())
104}
105
106#[serde_with::skip_serializing_none]
111#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct RealtimeTranscriptionSessionCreateResponse {
113 pub id: String,
114 pub object: String,
115 #[serde(rename = "type")]
116 pub r#type: RealtimeTranscriptionSessionType,
117 pub audio: Option<RealtimeTranscriptionSessionResponseAudio>,
118 pub expires_at: Option<i64>,
119 pub include: Option<Vec<RealtimeIncludeOption>>,
120}
121
122#[derive(Debug, Clone, Serialize, Deserialize)]
127#[serde(tag = "type")]
128pub enum RealtimeAudioFormats {
129 #[serde(rename = "audio/pcm")]
130 Pcm {
131 #[serde(skip_serializing_if = "Option::is_none")]
133 rate: Option<u32>,
134 },
135 #[serde(rename = "audio/pcmu")]
136 Pcmu,
137 #[serde(rename = "audio/pcma")]
138 Pcma,
139}
140
141#[serde_with::skip_serializing_none]
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct AudioTranscription {
148 pub language: Option<String>,
149 pub model: Option<String>,
150 pub prompt: Option<String>,
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
158#[serde(rename_all = "snake_case")]
159pub enum NoiseReductionType {
160 NearField,
161 FarField,
162}
163
164#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct NoiseReduction {
166 #[serde(rename = "type")]
167 pub r#type: NoiseReductionType,
168}
169
170#[derive(Debug, Clone, Serialize, Deserialize, Default)]
177#[serde(rename_all = "snake_case")]
178pub enum SemanticVadEagerness {
179 Low,
180 Medium,
181 High,
182 #[default]
183 Auto,
184}
185
186#[serde_with::skip_serializing_none]
187#[derive(Debug, Clone, Serialize, Deserialize)]
188#[serde(tag = "type")]
189pub enum TurnDetection {
190 #[serde(rename = "server_vad")]
191 ServerVad {
192 create_response: Option<bool>,
193 idle_timeout_ms: Option<u32>,
194 interrupt_response: Option<bool>,
195 prefix_padding_ms: Option<u32>,
196 silence_duration_ms: Option<u32>,
197 threshold: Option<f64>,
198 },
199 #[serde(rename = "semantic_vad")]
200 SemanticVad {
201 create_response: Option<bool>,
202 eagerness: Option<SemanticVadEagerness>,
203 interrupt_response: Option<bool>,
204 },
205}
206
207#[serde_with::skip_serializing_none]
209#[derive(Debug, Clone, Serialize, Deserialize)]
210#[serde(tag = "type")]
211pub enum RealtimeTranscriptionSessionTurnDetection {
212 #[serde(rename = "server_vad")]
213 ServerVad {
214 prefix_padding_ms: Option<u32>,
215 silence_duration_ms: Option<u32>,
216 threshold: Option<f64>,
217 },
218}
219
220#[derive(Debug, Clone, Serialize, Deserialize)]
231#[serde(untagged)]
232pub enum Voice {
233 VoiceIDsShared(String),
234 Custom { id: String },
235}
236
237#[derive(Debug, Clone, Serialize, Deserialize)]
242#[serde(rename_all = "snake_case")]
243pub enum OutputModality {
244 Text,
245 Audio,
246}
247
248#[expect(
249 clippy::unnecessary_wraps,
250 reason = "must return Option to match serde default field type"
251)]
252fn audio() -> Option<Vec<OutputModality>> {
253 Some(vec![OutputModality::Audio])
254}
255
256#[serde_with::skip_serializing_none]
261#[derive(Debug, Clone, Serialize, Deserialize)]
262pub struct TracingConfig {
263 pub group_id: Option<String>,
264 pub metadata: Option<Value>,
265 pub workflow_name: Option<String>,
266}
267
268#[derive(Debug, Clone, Serialize, Deserialize)]
270pub enum TracingMode {
271 #[serde(rename = "auto")]
272 Auto,
273}
274
275#[derive(Debug, Clone, Serialize, Deserialize)]
277#[serde(untagged)]
278pub enum RealtimeTracingConfig {
279 Mode(TracingMode),
280 Config(TracingConfig),
281}
282
283#[derive(Debug, Clone, Serialize, Deserialize)]
288#[serde(rename_all = "snake_case")]
289#[expect(
290 clippy::enum_variant_names,
291 reason = "variant names match OpenAI Realtime API spec"
292)]
293pub enum ConnectorId {
294 ConnectorDropbox,
295 ConnectorGmail,
296 ConnectorGooglecalendar,
297 ConnectorGoogledrive,
298 ConnectorMicrosoftteams,
299 ConnectorOutlookcalendar,
300 ConnectorOutlookemail,
301 ConnectorSharepoint,
302}
303
304#[serde_with::skip_serializing_none]
309#[derive(Debug, Clone, Serialize, Deserialize)]
310#[serde(tag = "type")]
311pub enum RealtimeToolsConfig {
312 #[serde(rename = "function")]
313 RealtimeFunctionTool {
314 description: Option<String>,
315 name: Option<String>,
316 parameters: Option<Value>,
317 },
318 #[serde(rename = "mcp")]
319 McpTool {
320 server_label: String,
321 allowed_tools: Option<McpAllowedTools>,
322 authorization: Option<Redacted>,
323 connector_id: Option<ConnectorId>,
324 headers: Option<HashMap<String, Redacted>>,
325 require_approval: Option<McpToolApproval>,
326 server_description: Option<String>,
327 server_url: Option<String>,
328 },
329}
330
331#[derive(Debug, Clone, Serialize, Deserialize)]
340#[serde(untagged)]
341pub enum McpAllowedTools {
342 List(Vec<String>),
343 Filter(McpToolFilter),
344}
345
346#[serde_with::skip_serializing_none]
348#[derive(Debug, Clone, Serialize, Deserialize)]
349pub struct McpToolFilter {
350 pub read_only: Option<bool>,
351 pub tool_names: Option<Vec<String>>,
352}
353
354#[derive(Debug, Clone, Serialize, Deserialize)]
359#[serde(untagged)]
360pub enum McpToolApproval {
361 Setting(McpToolApprovalSetting),
362 Filter(McpToolApprovalFilter),
363}
364
365#[derive(Debug, Clone, Serialize, Deserialize)]
367#[serde(rename_all = "snake_case")]
368pub enum McpToolApprovalSetting {
369 Always,
370 Never,
371}
372
373#[serde_with::skip_serializing_none]
375#[derive(Debug, Clone, Serialize, Deserialize)]
376pub struct McpToolApprovalFilter {
377 pub always: Option<McpToolFilter>,
378 pub never: Option<McpToolFilter>,
379}
380
381#[derive(Debug, Clone, Serialize, Deserialize)]
391#[serde(untagged)]
392pub enum RealtimeToolChoiceConfig {
393 Options(ToolChoiceOptions),
394 Reference(ToolReference),
395}
396
397#[derive(Debug, Clone, Serialize, Deserialize)]
399#[serde(rename_all = "snake_case")]
400pub enum ToolChoiceOptions {
401 None,
402 Auto,
403 Required,
404}
405
406#[derive(Debug, Clone, Serialize, Deserialize)]
417#[serde(untagged)]
418pub enum MaxOutputTokens {
419 Integer(u32),
421 Inf(InfMarker),
422}
423
424impl Default for MaxOutputTokens {
425 fn default() -> Self {
426 Self::Inf(InfMarker::Inf)
427 }
428}
429
430#[derive(Debug, Clone, Serialize, Deserialize)]
432pub enum InfMarker {
433 #[serde(rename = "inf")]
434 Inf,
435}
436
437#[serde_with::skip_serializing_none]
442#[derive(Debug, Clone, Serialize, Deserialize)]
443pub struct TruncationTokenLimits {
444 pub post_instructions: Option<u32>,
445}
446
447#[derive(Debug, Clone, Serialize, Deserialize)]
449pub enum RetentionRatioTruncationType {
450 #[serde(rename = "retention_ratio")]
451 RetentionRatio,
452}
453
454#[serde_with::skip_serializing_none]
455#[derive(Debug, Clone, Serialize, Deserialize)]
456pub struct RetentionRatioTruncation {
457 pub retention_ratio: f64,
458 #[serde(rename = "type")]
459 pub r#type: RetentionRatioTruncationType,
460 pub token_limits: Option<TruncationTokenLimits>,
461}
462
463#[derive(Debug, Clone, Serialize, Deserialize, Default)]
465#[serde(rename_all = "snake_case")]
466pub enum TruncationMode {
467 #[default]
468 Auto,
469 Disabled,
470}
471
472#[derive(Debug, Clone, Serialize, Deserialize)]
474#[serde(untagged)]
475pub enum RealtimeTruncation {
476 Mode(TruncationMode),
477 RetentionRatio(RetentionRatioTruncation),
478}
479
480#[serde_with::skip_serializing_none]
485#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
486#[validate(schema(function = "validate_client_secret_create_request"))]
487pub struct RealtimeClientSecretCreateRequest {
488 pub session: RealtimeSessionCreateRequest,
489}
490
491impl Normalizable for RealtimeClientSecretCreateRequest {}
492
493fn validate_client_secret_create_request(
494 req: &RealtimeClientSecretCreateRequest,
495) -> Result<(), ValidationError> {
496 let has_model = req
497 .session
498 .model
499 .as_deref()
500 .is_some_and(|m| !m.trim().is_empty());
501 if !has_model {
502 return Err(ValidationError::new("session.model is required"));
503 }
504 Ok(())
505}
506
507#[derive(Debug, Clone, Serialize, Deserialize)]
508pub struct RealtimeSessionClientSecret {
509 pub expires_at: i64,
510 pub value: Redacted,
511}
512
513#[serde_with::skip_serializing_none]
518#[derive(Debug, Clone, Serialize, Deserialize)]
519pub struct RealtimeAudioConfigInput {
520 pub format: Option<RealtimeAudioFormats>,
521 pub noise_reduction: Option<NoiseReduction>,
522 pub transcription: Option<AudioTranscription>,
523 pub turn_detection: Option<TurnDetection>,
524}
525
526#[serde_with::skip_serializing_none]
527#[derive(Debug, Clone, Serialize, Deserialize)]
528pub struct RealtimeAudioConfigOutput {
529 pub format: Option<RealtimeAudioFormats>,
530 pub speed: Option<f64>,
531 pub voice: Option<Voice>,
532}
533
534#[serde_with::skip_serializing_none]
535#[derive(Debug, Clone, Serialize, Deserialize)]
536pub struct RealtimeAudioConfig {
537 pub input: Option<RealtimeAudioConfigInput>,
538 pub output: Option<RealtimeAudioConfigOutput>,
539}
540
541#[serde_with::skip_serializing_none]
542#[derive(Debug, Clone, Serialize, Deserialize)]
543pub struct RealtimeTranscriptionSessionAudio {
544 pub input: Option<RealtimeAudioConfigInput>,
545}
546
547#[serde_with::skip_serializing_none]
548#[derive(Debug, Clone, Serialize, Deserialize)]
549pub struct RealtimeTranscriptionSessionResponseAudio {
550 pub input: Option<RealtimeTranscriptionSessionResponseAudioConfigInput>,
551}
552
553#[serde_with::skip_serializing_none]
554#[derive(Debug, Clone, Serialize, Deserialize)]
555pub struct RealtimeTranscriptionSessionResponseAudioConfigInput {
556 pub format: Option<RealtimeAudioFormats>,
557 pub noise_reduction: Option<NoiseReduction>,
558 pub transcription: Option<AudioTranscription>,
559 pub turn_detection: Option<RealtimeTranscriptionSessionTurnDetection>,
560}
561
562#[derive(Debug, Clone, Serialize, Deserialize)]
567pub enum RealtimeIncludeOption {
568 #[serde(rename = "item.input_audio_transcription.logprobs")]
569 InputAudioTranscriptionLogprobs,
570}
571
572#[derive(Debug, Clone, Serialize, Deserialize)]
578pub enum RealtimeSessionType {
579 #[serde(rename = "realtime")]
580 Realtime,
581}
582
583#[derive(Debug, Clone, Serialize, Deserialize)]
589pub enum RealtimeTranscriptionSessionType {
590 #[serde(rename = "transcription")]
591 Transcription,
592}