/*
* OpenAI API
*
* The OpenAI REST API. Please see https://platform.openai.com/docs/api-reference for more details.
*
* The version of the OpenAPI document: 2.3.0
*
* Generated by: https://openapi-generator.tech
*/
use crate::models;
use serde::{Deserialize, Serialize};
/// RealtimeTranslationClientEventInputAudioBufferAppend : Send this event to append audio bytes to the translation session input audio buffer. WebSocket translation sessions accept base64-encoded 24 kHz PCM16 mono little-endian raw audio bytes. Unsupported websocket audio formats return a validation error because lower-quality audio materially degrades translation quality. Translation consumes 200 ms engine frames. For best realtime behavior, append audio in 200 ms chunks. If a chunk is shorter, the server buffers it until it has enough audio for one frame. If a chunk is longer, the server splits it into 200 ms frames and enqueues them back-to-back. Keep appending silence while the session is active. If a client stops sending audio and later resumes, model time treats the resumed audio as contiguous with the previous audio rather than as a real-world pause.
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize, bon::Builder)]
pub struct RealtimeTranslationClientEventInputAudioBufferAppend {
/// Optional client-generated ID used to identify this event.
#[serde(rename = "event_id", skip_serializing_if = "Option::is_none")]
pub event_id: Option<String>,
/// The event type, must be `session.input_audio_buffer.append`.
#[serde(rename = "type")]
pub r#type: Type,
/// Base64-encoded 24 kHz PCM16 mono audio bytes.
#[serde(rename = "audio")]
pub audio: String,
}
impl RealtimeTranslationClientEventInputAudioBufferAppend {
/// Send this event to append audio bytes to the translation session input audio buffer. WebSocket translation sessions accept base64-encoded 24 kHz PCM16 mono little-endian raw audio bytes. Unsupported websocket audio formats return a validation error because lower-quality audio materially degrades translation quality. Translation consumes 200 ms engine frames. For best realtime behavior, append audio in 200 ms chunks. If a chunk is shorter, the server buffers it until it has enough audio for one frame. If a chunk is longer, the server splits it into 200 ms frames and enqueues them back-to-back. Keep appending silence while the session is active. If a client stops sending audio and later resumes, model time treats the resumed audio as contiguous with the previous audio rather than as a real-world pause.
pub fn new(
r#type: Type,
audio: String,
) -> RealtimeTranslationClientEventInputAudioBufferAppend {
RealtimeTranslationClientEventInputAudioBufferAppend {
event_id: None,
r#type,
audio,
}
}
}
/// The event type, must be `session.input_audio_buffer.append`.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
pub enum Type {
#[serde(rename = "session.input_audio_buffer.append")]
SessionInputAudioBufferAppend,
}
impl Default for Type {
fn default() -> Type {
Self::SessionInputAudioBufferAppend
}
}
impl std::fmt::Display for RealtimeTranslationClientEventInputAudioBufferAppend {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match serde_json::to_string(self) {
Ok(s) => write!(f, "{}", s),
Err(_) => Err(std::fmt::Error),
}
}
}