1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
/*
* OpenAI API
*
* The OpenAI REST API. Please see https://platform.openai.com/docs/api-reference for more details.
*
* The version of the OpenAPI document: 2.3.0
*
* Generated by: https://openapi-generator.tech
*/
use crate::models;
use serde::{Deserialize, Serialize};
/// RealtimeTranscriptionSessionCreateRequestTurnDetection : Configuration for turn detection. Can be set to `null` to turn off. Server VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize, bon::Builder)]
pub struct RealtimeTranscriptionSessionCreateRequestTurnDetection {
/// Type of turn detection. Only `server_vad` is currently supported for transcription sessions.
#[serde(rename = "type", skip_serializing_if = "Option::is_none")]
pub r#type: Option<Type>,
/// Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher threshold will require louder audio to activate the model, and thus might perform better in noisy environments.
#[serde(rename = "threshold", skip_serializing_if = "Option::is_none")]
pub threshold: Option<f64>,
/// Amount of audio to include before the VAD detected speech (in milliseconds). Defaults to 300ms.
#[serde(rename = "prefix_padding_ms", skip_serializing_if = "Option::is_none")]
pub prefix_padding_ms: Option<i32>,
/// Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. With shorter values the model will respond more quickly, but may jump in on short pauses from the user.
#[serde(
rename = "silence_duration_ms",
skip_serializing_if = "Option::is_none"
)]
pub silence_duration_ms: Option<i32>,
}
impl RealtimeTranscriptionSessionCreateRequestTurnDetection {
/// Configuration for turn detection. Can be set to `null` to turn off. Server VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.
pub fn new() -> RealtimeTranscriptionSessionCreateRequestTurnDetection {
RealtimeTranscriptionSessionCreateRequestTurnDetection {
r#type: None,
threshold: None,
prefix_padding_ms: None,
silence_duration_ms: None,
}
}
}
/// Type of turn detection. Only `server_vad` is currently supported for transcription sessions.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
pub enum Type {
#[serde(rename = "server_vad")]
ServerVad,
}
impl Default for Type {
fn default() -> Type {
Self::ServerVad
}
}
impl std::fmt::Display for RealtimeTranscriptionSessionCreateRequestTurnDetection {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match serde_json::to_string(self) {
Ok(s) => write!(f, "{}", s),
Err(_) => Err(std::fmt::Error),
}
}
}