use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use validator::{Validate, ValidationError};
use crate::{
builders::RealtimeResponseBuilder,
common::ResponsePrompt,
realtime_conversation::{ConversationItemRole, RealtimeContentPart, RealtimeConversationItem},
realtime_session::{
MaxOutputTokens, OutputModality, RealtimeAudioFormats, RealtimeToolChoiceConfig,
RealtimeToolsConfig, Voice,
},
};
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeResponse {
pub id: Option<String>,
pub audio: Option<RealtimeResponseCreateAudioOutput>,
pub conversation_id: Option<String>,
pub max_output_tokens: Option<MaxOutputTokens>,
pub metadata: Option<HashMap<String, String>>,
pub object: Option<RealtimeResponseObject>,
pub output: Option<Vec<RealtimeConversationItem>>,
pub output_modalities: Option<Vec<OutputModality>>,
pub status: Option<ResponseStatus>,
pub status_details: Option<RealtimeResponseStatus>,
pub usage: Option<RealtimeResponseUsage>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
#[validate(schema(function = "validate_response_create_params"))]
pub struct RealtimeResponseCreateParams {
pub audio: Option<RealtimeResponseCreateAudioOutput>,
pub conversation: Option<ResponseConversation>,
pub input: Option<Vec<RealtimeConversationItem>>,
pub instructions: Option<String>,
pub max_output_tokens: Option<MaxOutputTokens>,
pub metadata: Option<HashMap<String, String>>,
pub output_modalities: Option<Vec<OutputModality>>,
pub prompt: Option<ResponsePrompt>,
pub tool_choice: Option<RealtimeToolChoiceConfig>,
pub tools: Option<Vec<RealtimeToolsConfig>>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum RealtimeResponseObject {
#[serde(rename = "realtime.response")]
RealtimeResponse,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ResponseStatus {
Completed,
Cancelled,
Failed,
Incomplete,
InProgress,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum StatusDetailsType {
Completed,
Cancelled,
Failed,
Incomplete,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum StatusDetailsReason {
TurnDetected,
ClientCancelled,
MaxOutputTokens,
ContentFilter,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResponseStatusError {
pub code: Option<String>,
#[serde(rename = "type")]
pub r#type: Option<String>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeResponseStatus {
pub error: Option<ResponseStatusError>,
pub reason: Option<StatusDetailsReason>,
#[serde(rename = "type")]
pub r#type: Option<StatusDetailsType>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResponseAudioOutputConfig {
pub format: Option<RealtimeAudioFormats>,
pub voice: Option<Voice>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeResponseCreateAudioOutput {
pub output: Option<ResponseAudioOutputConfig>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CachedTokensDetails {
pub audio_tokens: Option<u64>,
pub image_tokens: Option<u64>,
pub text_tokens: Option<u64>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeResponseUsageInputTokenDetails {
pub audio_tokens: Option<u64>,
pub cached_tokens: Option<u64>,
pub cached_tokens_details: Option<CachedTokensDetails>,
pub image_tokens: Option<u64>,
pub text_tokens: Option<u64>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeResponseUsageOutputTokenDetails {
pub audio_tokens: Option<u64>,
pub text_tokens: Option<u64>,
}
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RealtimeResponseUsage {
pub input_token_details: Option<RealtimeResponseUsageInputTokenDetails>,
pub input_tokens: Option<u64>,
pub output_token_details: Option<RealtimeResponseUsageOutputTokenDetails>,
pub output_tokens: Option<u64>,
pub total_tokens: Option<u64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum ResponseConversation {
Mode(ResponseConversationMode),
Id(String),
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum ResponseConversationMode {
#[default]
Auto,
None,
}
fn validate_response_create_params(
request: &RealtimeResponseCreateParams,
) -> Result<(), ValidationError> {
if let Some(items) = &request.input {
for item in items {
validate_conversation_item(item)?;
}
}
Ok(())
}
fn validate_conversation_item(item: &RealtimeConversationItem) -> Result<(), ValidationError> {
let (role, content) = match item {
RealtimeConversationItem::Message { role, content, .. } => (role, content),
_ => return Ok(()),
};
for (i, part) in content.iter().enumerate() {
let allowed = match role {
ConversationItemRole::System => {
matches!(part, RealtimeContentPart::InputText { .. })
}
ConversationItemRole::User => matches!(
part,
RealtimeContentPart::InputText { .. }
| RealtimeContentPart::InputAudio { .. }
| RealtimeContentPart::InputImage { .. }
),
ConversationItemRole::Assistant => matches!(
part,
RealtimeContentPart::OutputText { .. } | RealtimeContentPart::OutputAudio { .. }
),
};
if !allowed {
let mut err = ValidationError::new("invalid_content_part");
err.message = Some(
format!(
"content[{}]: {:?} role does not allow \"{}\" content parts",
i,
role,
part.type_name()
)
.into(),
);
return Err(err);
}
}
Ok(())
}
impl RealtimeResponse {
pub fn builder(id: impl Into<String>) -> RealtimeResponseBuilder {
RealtimeResponseBuilder::new(id)
}
}