use crate::audio::AudioEncoding;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::ops::{Deref, DerefMut};
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum InterruptionDetection {
#[default]
Manual,
Automatic,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum VadMode {
#[default]
ServerVad,
SemanticVad,
None,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct VadConfig {
#[serde(rename = "type")]
pub mode: VadMode,
#[serde(skip_serializing_if = "Option::is_none")]
pub silence_duration_ms: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub threshold: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub prefix_padding_ms: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub interrupt_response: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub eagerness: Option<String>,
}
impl Default for VadConfig {
fn default() -> Self {
Self {
mode: VadMode::ServerVad,
silence_duration_ms: Some(500),
threshold: None,
prefix_padding_ms: None,
interrupt_response: Some(true),
eagerness: None,
}
}
}
impl VadConfig {
pub fn server_vad() -> Self {
Self::default()
}
pub fn semantic_vad() -> Self {
Self { mode: VadMode::SemanticVad, ..Default::default() }
}
pub fn disabled() -> Self {
Self { mode: VadMode::None, ..Default::default() }
}
pub fn with_silence_duration(mut self, ms: u32) -> Self {
self.silence_duration_ms = Some(ms);
self
}
pub fn with_interrupt(mut self, interrupt: bool) -> Self {
self.interrupt_response = Some(interrupt);
self
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ToolDefinition {
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub parameters: Option<Value>,
}
impl ToolDefinition {
pub fn new(name: impl Into<String>) -> Self {
Self { name: name.into(), description: None, parameters: None }
}
pub fn with_description(mut self, desc: impl Into<String>) -> Self {
self.description = Some(desc.into());
self
}
pub fn with_parameters(mut self, schema: Value) -> Self {
self.parameters = Some(schema);
self
}
}
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct RealtimeConfig {
#[serde(skip_serializing_if = "Option::is_none")]
pub model: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub instruction: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub voice: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub modalities: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub input_audio_format: Option<AudioEncoding>,
#[serde(skip_serializing_if = "Option::is_none")]
pub output_audio_format: Option<AudioEncoding>,
#[serde(skip_serializing_if = "Option::is_none")]
pub turn_detection: Option<VadConfig>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tools: Option<Vec<ToolDefinition>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub input_audio_transcription: Option<TranscriptionConfig>,
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_response_output_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cached_content: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub interruption_detection: Option<InterruptionDetection>,
#[serde(skip_serializing_if = "Option::is_none")]
pub extra: Option<Value>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(transparent)]
pub struct SessionUpdateConfig(pub RealtimeConfig);
impl Deref for SessionUpdateConfig {
type Target = RealtimeConfig;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for SessionUpdateConfig {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl From<RealtimeConfig> for SessionUpdateConfig {
fn from(config: RealtimeConfig) -> Self {
Self(config)
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TranscriptionConfig {
pub model: String,
}
impl TranscriptionConfig {
pub fn whisper() -> Self {
Self { model: "whisper-1".to_string() }
}
}
impl RealtimeConfig {
pub fn new() -> Self {
Self::default()
}
pub fn builder() -> RealtimeConfigBuilder {
RealtimeConfigBuilder::new()
}
pub fn with_model(mut self, model: impl Into<String>) -> Self {
self.model = Some(model.into());
self
}
pub fn with_instruction(mut self, instruction: impl Into<String>) -> Self {
self.instruction = Some(instruction.into());
self
}
pub fn with_voice(mut self, voice: impl Into<String>) -> Self {
self.voice = Some(voice.into());
self
}
pub fn with_modalities(mut self, modalities: Vec<String>) -> Self {
self.modalities = Some(modalities);
self
}
pub fn with_text_and_audio(mut self) -> Self {
self.modalities = Some(vec!["text".to_string(), "audio".to_string()]);
self
}
pub fn with_audio_only(mut self) -> Self {
self.modalities = Some(vec!["audio".to_string()]);
self
}
pub fn with_vad(mut self, vad: VadConfig) -> Self {
self.turn_detection = Some(vad);
self
}
pub fn with_server_vad(self) -> Self {
self.with_vad(VadConfig::server_vad())
}
pub fn without_vad(mut self) -> Self {
self.turn_detection = Some(VadConfig::disabled());
self
}
pub fn with_tool(mut self, tool: ToolDefinition) -> Self {
self.tools.get_or_insert_with(Vec::new).push(tool);
self
}
pub fn with_tools(mut self, tools: Vec<ToolDefinition>) -> Self {
self.tools = Some(tools);
self
}
pub fn with_transcription(mut self) -> Self {
self.input_audio_transcription = Some(TranscriptionConfig::whisper());
self
}
pub fn with_temperature(mut self, temp: f32) -> Self {
self.temperature = Some(temp);
self
}
pub fn with_cached_content(mut self, content: impl Into<String>) -> Self {
self.cached_content = Some(content.into());
self
}
pub fn with_interruption_detection(mut self, mode: InterruptionDetection) -> Self {
self.interruption_detection = Some(mode);
self
}
pub fn with_automatic_interruption(self) -> Self {
self.with_interruption_detection(InterruptionDetection::Automatic)
}
}
#[derive(Debug, Clone, Default)]
pub struct RealtimeConfigBuilder {
config: RealtimeConfig,
}
impl RealtimeConfigBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn model(mut self, model: impl Into<String>) -> Self {
self.config.model = Some(model.into());
self
}
pub fn instruction(mut self, instruction: impl Into<String>) -> Self {
self.config.instruction = Some(instruction.into());
self
}
pub fn voice(mut self, voice: impl Into<String>) -> Self {
self.config.voice = Some(voice.into());
self
}
pub fn vad_enabled(mut self, enabled: bool) -> Self {
if enabled {
self.config.turn_detection = Some(VadConfig::server_vad());
} else {
self.config.turn_detection = Some(VadConfig::disabled());
}
self
}
pub fn vad(mut self, vad: VadConfig) -> Self {
self.config.turn_detection = Some(vad);
self
}
pub fn tool(mut self, tool: ToolDefinition) -> Self {
self.config.tools.get_or_insert_with(Vec::new).push(tool);
self
}
pub fn temperature(mut self, temp: f32) -> Self {
self.config.temperature = Some(temp);
self
}
pub fn cached_content(mut self, content: impl Into<String>) -> Self {
self.config.cached_content = Some(content.into());
self
}
pub fn interruption_detection(mut self, mode: InterruptionDetection) -> Self {
self.config.interruption_detection = Some(mode);
self
}
pub fn build(self) -> RealtimeConfig {
self.config
}
}