1use crate::audio::AudioEncoding;
4use serde::{Deserialize, Serialize};
5use serde_json::Value;
6use std::ops::{Deref, DerefMut};
7
8#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
21#[serde(rename_all = "snake_case")]
22pub enum InterruptionDetection {
23 #[default]
29 Manual,
30 Automatic,
37}
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
41#[serde(rename_all = "snake_case")]
42pub enum VadMode {
43 #[default]
45 ServerVad,
46 SemanticVad,
48 None,
50}
51
52#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
54pub struct VadConfig {
55 #[serde(rename = "type")]
57 pub mode: VadMode,
58 #[serde(skip_serializing_if = "Option::is_none")]
60 pub silence_duration_ms: Option<u32>,
61 #[serde(skip_serializing_if = "Option::is_none")]
63 pub threshold: Option<f32>,
64 #[serde(skip_serializing_if = "Option::is_none")]
66 pub prefix_padding_ms: Option<u32>,
67 #[serde(skip_serializing_if = "Option::is_none")]
69 pub interrupt_response: Option<bool>,
70 #[serde(skip_serializing_if = "Option::is_none")]
72 pub eagerness: Option<String>,
73}
74
75impl Default for VadConfig {
76 fn default() -> Self {
77 Self {
78 mode: VadMode::ServerVad,
79 silence_duration_ms: Some(500),
80 threshold: None,
81 prefix_padding_ms: None,
82 interrupt_response: Some(true),
83 eagerness: None,
84 }
85 }
86}
87
88impl VadConfig {
89 pub fn server_vad() -> Self {
91 Self::default()
92 }
93
94 pub fn semantic_vad() -> Self {
96 Self { mode: VadMode::SemanticVad, ..Default::default() }
97 }
98
99 pub fn disabled() -> Self {
101 Self { mode: VadMode::None, ..Default::default() }
102 }
103
104 pub fn with_silence_duration(mut self, ms: u32) -> Self {
106 self.silence_duration_ms = Some(ms);
107 self
108 }
109
110 pub fn with_interrupt(mut self, interrupt: bool) -> Self {
112 self.interrupt_response = Some(interrupt);
113 self
114 }
115}
116
117#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
119pub struct ToolDefinition {
120 pub name: String,
122 #[serde(skip_serializing_if = "Option::is_none")]
124 pub description: Option<String>,
125 #[serde(skip_serializing_if = "Option::is_none")]
127 pub parameters: Option<Value>,
128}
129
130impl ToolDefinition {
131 pub fn new(name: impl Into<String>) -> Self {
133 Self { name: name.into(), description: None, parameters: None }
134 }
135
136 pub fn with_description(mut self, desc: impl Into<String>) -> Self {
138 self.description = Some(desc.into());
139 self
140 }
141
142 pub fn with_parameters(mut self, schema: Value) -> Self {
144 self.parameters = Some(schema);
145 self
146 }
147}
148
149#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
151pub struct RealtimeConfig {
152 #[serde(skip_serializing_if = "Option::is_none")]
154 pub model: Option<String>,
155
156 #[serde(skip_serializing_if = "Option::is_none")]
158 pub instruction: Option<String>,
159
160 #[serde(skip_serializing_if = "Option::is_none")]
162 pub voice: Option<String>,
163
164 #[serde(skip_serializing_if = "Option::is_none")]
166 pub modalities: Option<Vec<String>>,
167
168 #[serde(skip_serializing_if = "Option::is_none")]
170 pub input_audio_format: Option<AudioEncoding>,
171
172 #[serde(skip_serializing_if = "Option::is_none")]
174 pub output_audio_format: Option<AudioEncoding>,
175
176 #[serde(skip_serializing_if = "Option::is_none")]
178 pub turn_detection: Option<VadConfig>,
179
180 #[serde(skip_serializing_if = "Option::is_none")]
182 pub tools: Option<Vec<ToolDefinition>>,
183
184 #[serde(skip_serializing_if = "Option::is_none")]
186 pub tool_choice: Option<String>,
187
188 #[serde(skip_serializing_if = "Option::is_none")]
190 pub input_audio_transcription: Option<TranscriptionConfig>,
191
192 #[serde(skip_serializing_if = "Option::is_none")]
194 pub temperature: Option<f32>,
195
196 #[serde(skip_serializing_if = "Option::is_none")]
198 pub max_response_output_tokens: Option<u32>,
199
200 #[serde(skip_serializing_if = "Option::is_none")]
202 pub cached_content: Option<String>,
203
204 #[serde(skip_serializing_if = "Option::is_none")]
210 pub interruption_detection: Option<InterruptionDetection>,
211
212 #[serde(skip_serializing_if = "Option::is_none")]
214 pub extra: Option<Value>,
215}
216
217#[derive(Debug, Clone, Default, Serialize, Deserialize)]
242#[serde(transparent)]
243pub struct SessionUpdateConfig(pub RealtimeConfig);
244
245impl Deref for SessionUpdateConfig {
246 type Target = RealtimeConfig;
247
248 fn deref(&self) -> &Self::Target {
249 &self.0
250 }
251}
252
253impl DerefMut for SessionUpdateConfig {
254 fn deref_mut(&mut self) -> &mut Self::Target {
255 &mut self.0
256 }
257}
258
259impl From<RealtimeConfig> for SessionUpdateConfig {
260 fn from(config: RealtimeConfig) -> Self {
261 Self(config)
262 }
263}
264
265#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
267pub struct TranscriptionConfig {
268 pub model: String,
270}
271
272impl TranscriptionConfig {
273 pub fn whisper() -> Self {
275 Self { model: "whisper-1".to_string() }
276 }
277}
278
279impl RealtimeConfig {
280 pub fn new() -> Self {
282 Self::default()
283 }
284
285 pub fn builder() -> RealtimeConfigBuilder {
287 RealtimeConfigBuilder::new()
288 }
289
290 pub fn with_model(mut self, model: impl Into<String>) -> Self {
292 self.model = Some(model.into());
293 self
294 }
295
296 pub fn with_instruction(mut self, instruction: impl Into<String>) -> Self {
298 self.instruction = Some(instruction.into());
299 self
300 }
301
302 pub fn with_voice(mut self, voice: impl Into<String>) -> Self {
304 self.voice = Some(voice.into());
305 self
306 }
307
308 pub fn with_modalities(mut self, modalities: Vec<String>) -> Self {
310 self.modalities = Some(modalities);
311 self
312 }
313
314 pub fn with_text_and_audio(mut self) -> Self {
316 self.modalities = Some(vec!["text".to_string(), "audio".to_string()]);
317 self
318 }
319
320 pub fn with_audio_only(mut self) -> Self {
322 self.modalities = Some(vec!["audio".to_string()]);
323 self
324 }
325
326 pub fn with_vad(mut self, vad: VadConfig) -> Self {
328 self.turn_detection = Some(vad);
329 self
330 }
331
332 pub fn with_server_vad(self) -> Self {
334 self.with_vad(VadConfig::server_vad())
335 }
336
337 pub fn without_vad(mut self) -> Self {
339 self.turn_detection = Some(VadConfig::disabled());
340 self
341 }
342
343 pub fn with_tool(mut self, tool: ToolDefinition) -> Self {
345 self.tools.get_or_insert_with(Vec::new).push(tool);
346 self
347 }
348
349 pub fn with_tools(mut self, tools: Vec<ToolDefinition>) -> Self {
351 self.tools = Some(tools);
352 self
353 }
354
355 pub fn with_transcription(mut self) -> Self {
357 self.input_audio_transcription = Some(TranscriptionConfig::whisper());
358 self
359 }
360
361 pub fn with_temperature(mut self, temp: f32) -> Self {
363 self.temperature = Some(temp);
364 self
365 }
366
367 pub fn with_cached_content(mut self, content: impl Into<String>) -> Self {
369 self.cached_content = Some(content.into());
370 self
371 }
372
373 pub fn with_interruption_detection(mut self, mode: InterruptionDetection) -> Self {
377 self.interruption_detection = Some(mode);
378 self
379 }
380
381 pub fn with_automatic_interruption(self) -> Self {
386 self.with_interruption_detection(InterruptionDetection::Automatic)
387 }
388}
389
390#[derive(Debug, Clone, Default)]
392pub struct RealtimeConfigBuilder {
393 config: RealtimeConfig,
394}
395
396impl RealtimeConfigBuilder {
397 pub fn new() -> Self {
399 Self::default()
400 }
401
402 pub fn model(mut self, model: impl Into<String>) -> Self {
404 self.config.model = Some(model.into());
405 self
406 }
407
408 pub fn instruction(mut self, instruction: impl Into<String>) -> Self {
410 self.config.instruction = Some(instruction.into());
411 self
412 }
413
414 pub fn voice(mut self, voice: impl Into<String>) -> Self {
416 self.config.voice = Some(voice.into());
417 self
418 }
419
420 pub fn vad_enabled(mut self, enabled: bool) -> Self {
422 if enabled {
423 self.config.turn_detection = Some(VadConfig::server_vad());
424 } else {
425 self.config.turn_detection = Some(VadConfig::disabled());
426 }
427 self
428 }
429
430 pub fn vad(mut self, vad: VadConfig) -> Self {
432 self.config.turn_detection = Some(vad);
433 self
434 }
435
436 pub fn tool(mut self, tool: ToolDefinition) -> Self {
438 self.config.tools.get_or_insert_with(Vec::new).push(tool);
439 self
440 }
441
442 pub fn temperature(mut self, temp: f32) -> Self {
444 self.config.temperature = Some(temp);
445 self
446 }
447
448 pub fn cached_content(mut self, content: impl Into<String>) -> Self {
450 self.config.cached_content = Some(content.into());
451 self
452 }
453
454 pub fn interruption_detection(mut self, mode: InterruptionDetection) -> Self {
456 self.config.interruption_detection = Some(mode);
457 self
458 }
459
460 pub fn build(self) -> RealtimeConfig {
462 self.config
463 }
464}