use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use validator::{Validate, ValidationError};
use crate::{
common::{Redacted, ResponsePrompt, ToolReference},
validated::Normalizable,
};
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
#[validate(schema(function = "validate_session_create_request"))]
pub struct RealtimeSessionCreateRequest {
#[serde(rename = "type")]
pub r#type: RealtimeSessionType,
pub audio: Option<RealtimeAudioConfig>,
pub include: Option<Vec<RealtimeIncludeOption>>,
pub instructions: Option<String>,
pub max_output_tokens: Option<MaxOutputTokens>,
pub model: Option<String>,
#[serde(default = "audio")]
pub output_modalities: Option<Vec<OutputModality>>,
pub prompt: Option<ResponsePrompt>,
pub tool_choice: Option<RealtimeToolChoiceConfig>,
pub tools: Option<RealtimeToolsConfig>,
pub tracing: Option<RealtimeTracingConfig>,
pub truncation: Option<RealtimeTruncation>,
}
impl Normalizable for RealtimeSessionCreateRequest {}
fn validate_session_create_request(
req: &RealtimeSessionCreateRequest,
) -> Result<(), ValidationError> {
let has_model = req.model.as_deref().is_some_and(|m| !m.trim().is_empty());
if !has_model {
return Err(ValidationError::new("model is required"));
}
Ok(())
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeSessionCreateResponse {
pub client_secret: RealtimeSessionClientSecret,
#[serde(rename = "type")]
pub r#type: RealtimeSessionType,
pub audio: Option<RealtimeAudioConfig>,
pub include: Option<Vec<RealtimeIncludeOption>>,
pub instructions: Option<String>,
pub max_output_tokens: Option<MaxOutputTokens>,
pub model: Option<String>,
#[serde(default = "audio")]
pub output_modalities: Option<Vec<OutputModality>>,
pub prompt: Option<ResponsePrompt>,
pub tool_choice: Option<RealtimeToolChoiceConfig>,
pub tools: Option<Vec<RealtimeToolsConfig>>,
pub tracing: Option<RealtimeTracingConfig>,
pub truncation: Option<RealtimeTruncation>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
#[validate(schema(function = "validate_transcription_session_create_request"))]
pub struct RealtimeTranscriptionSessionCreateRequest {
#[serde(rename = "type")]
pub r#type: RealtimeTranscriptionSessionType,
pub audio: Option<RealtimeTranscriptionSessionAudio>,
pub include: Option<Vec<RealtimeIncludeOption>>,
pub model: Option<String>,
pub language: Option<String>,
pub prompt: Option<String>,
}
impl Normalizable for RealtimeTranscriptionSessionCreateRequest {
}
fn validate_transcription_session_create_request(
req: &RealtimeTranscriptionSessionCreateRequest,
) -> Result<(), ValidationError> {
let has_model = req.model.as_deref().is_some_and(|m| !m.trim().is_empty());
if !has_model {
return Err(ValidationError::new("model is required"));
}
Ok(())
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeTranscriptionSessionCreateResponse {
pub id: String,
pub object: String,
#[serde(rename = "type")]
pub r#type: RealtimeTranscriptionSessionType,
pub audio: Option<RealtimeTranscriptionSessionResponseAudio>,
pub expires_at: Option<i64>,
pub include: Option<Vec<RealtimeIncludeOption>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum RealtimeAudioFormats {
#[serde(rename = "audio/pcm")]
Pcm {
#[serde(skip_serializing_if = "Option::is_none")]
rate: Option<u32>,
},
#[serde(rename = "audio/pcmu")]
Pcmu,
#[serde(rename = "audio/pcma")]
Pcma,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AudioTranscription {
pub language: Option<String>,
pub model: Option<String>,
pub prompt: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum NoiseReductionType {
NearField,
FarField,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NoiseReduction {
#[serde(rename = "type")]
pub r#type: NoiseReductionType,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum SemanticVadEagerness {
Low,
Medium,
High,
#[default]
Auto,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum TurnDetection {
#[serde(rename = "server_vad")]
ServerVad {
create_response: Option<bool>,
idle_timeout_ms: Option<u32>,
interrupt_response: Option<bool>,
prefix_padding_ms: Option<u32>,
silence_duration_ms: Option<u32>,
threshold: Option<f64>,
},
#[serde(rename = "semantic_vad")]
SemanticVad {
create_response: Option<bool>,
eagerness: Option<SemanticVadEagerness>,
interrupt_response: Option<bool>,
},
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum RealtimeTranscriptionSessionTurnDetection {
#[serde(rename = "server_vad")]
ServerVad {
prefix_padding_ms: Option<u32>,
silence_duration_ms: Option<u32>,
threshold: Option<f64>,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum Voice {
VoiceIDsShared(String),
Custom { id: String },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum OutputModality {
Text,
Audio,
}
#[expect(
clippy::unnecessary_wraps,
reason = "must return Option to match serde default field type"
)]
fn audio() -> Option<Vec<OutputModality>> {
Some(vec![OutputModality::Audio])
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TracingConfig {
pub group_id: Option<String>,
pub metadata: Option<Value>,
pub workflow_name: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum TracingMode {
#[serde(rename = "auto")]
Auto,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum RealtimeTracingConfig {
Mode(TracingMode),
Config(TracingConfig),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
#[expect(
clippy::enum_variant_names,
reason = "variant names match OpenAI Realtime API spec"
)]
pub enum ConnectorId {
ConnectorDropbox,
ConnectorGmail,
ConnectorGooglecalendar,
ConnectorGoogledrive,
ConnectorMicrosoftteams,
ConnectorOutlookcalendar,
ConnectorOutlookemail,
ConnectorSharepoint,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum RealtimeToolsConfig {
#[serde(rename = "function")]
RealtimeFunctionTool {
description: Option<String>,
name: Option<String>,
parameters: Option<Value>,
},
#[serde(rename = "mcp")]
McpTool {
server_label: String,
allowed_tools: Option<McpAllowedTools>,
authorization: Option<Redacted>,
connector_id: Option<ConnectorId>,
headers: Option<HashMap<String, Redacted>>,
require_approval: Option<McpToolApproval>,
server_description: Option<String>,
server_url: Option<String>,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum McpAllowedTools {
List(Vec<String>),
Filter(McpToolFilter),
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct McpToolFilter {
pub read_only: Option<bool>,
pub tool_names: Option<Vec<String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum McpToolApproval {
Setting(McpToolApprovalSetting),
Filter(McpToolApprovalFilter),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum McpToolApprovalSetting {
Always,
Never,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct McpToolApprovalFilter {
pub always: Option<McpToolFilter>,
pub never: Option<McpToolFilter>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum RealtimeToolChoiceConfig {
Options(ToolChoiceOptions),
Reference(ToolReference),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ToolChoiceOptions {
None,
Auto,
Required,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum MaxOutputTokens {
Integer(u32),
Inf(InfMarker),
}
impl Default for MaxOutputTokens {
fn default() -> Self {
Self::Inf(InfMarker::Inf)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum InfMarker {
#[serde(rename = "inf")]
Inf,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TruncationTokenLimits {
pub post_instructions: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RetentionRatioTruncationType {
#[serde(rename = "retention_ratio")]
RetentionRatio,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RetentionRatioTruncation {
pub retention_ratio: f64,
#[serde(rename = "type")]
pub r#type: RetentionRatioTruncationType,
pub token_limits: Option<TruncationTokenLimits>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum TruncationMode {
#[default]
Auto,
Disabled,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum RealtimeTruncation {
Mode(TruncationMode),
RetentionRatio(RetentionRatioTruncation),
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
#[validate(schema(function = "validate_client_secret_create_request"))]
pub struct RealtimeClientSecretCreateRequest {
pub session: RealtimeSessionCreateRequest,
}
impl Normalizable for RealtimeClientSecretCreateRequest {}
fn validate_client_secret_create_request(
req: &RealtimeClientSecretCreateRequest,
) -> Result<(), ValidationError> {
let has_model = req
.session
.model
.as_deref()
.is_some_and(|m| !m.trim().is_empty());
if !has_model {
return Err(ValidationError::new("session.model is required"));
}
Ok(())
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeSessionClientSecret {
pub expires_at: i64,
pub value: Redacted,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeAudioConfigInput {
pub format: Option<RealtimeAudioFormats>,
pub noise_reduction: Option<NoiseReduction>,
pub transcription: Option<AudioTranscription>,
pub turn_detection: Option<TurnDetection>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeAudioConfigOutput {
pub format: Option<RealtimeAudioFormats>,
pub speed: Option<f64>,
pub voice: Option<Voice>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeAudioConfig {
pub input: Option<RealtimeAudioConfigInput>,
pub output: Option<RealtimeAudioConfigOutput>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeTranscriptionSessionAudio {
pub input: Option<RealtimeAudioConfigInput>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeTranscriptionSessionResponseAudio {
pub input: Option<RealtimeTranscriptionSessionResponseAudioConfigInput>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeTranscriptionSessionResponseAudioConfigInput {
pub format: Option<RealtimeAudioFormats>,
pub noise_reduction: Option<NoiseReduction>,
pub transcription: Option<AudioTranscription>,
pub turn_detection: Option<RealtimeTranscriptionSessionTurnDetection>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RealtimeIncludeOption {
#[serde(rename = "item.input_audio_transcription.logprobs")]
InputAudioTranscriptionLogprobs,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RealtimeSessionType {
#[serde(rename = "realtime")]
Realtime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RealtimeTranscriptionSessionType {
#[serde(rename = "transcription")]
Transcription,
}