pub struct RealtimeSessionTurnDetection {
pub create_response: Option<bool>,
pub eagerness: Option<String>,
pub interrupt_response: Option<bool>,
pub prefix_padding_ms: Option<i32>,
pub silence_duration_ms: Option<i32>,
pub threshold: Option<f32>,
pub _type: Option<String>,
}
Fields§
§create_response: Option<bool>
Whether or not to automatically generate a response when a VAD stop event occurs.
eagerness: Option<String>
Used only for semantic_vad
mode. The eagerness of the model to respond. low
will wait longer for the user to continue speaking, high
will respond more quickly. auto
is the default and is equivalent to medium
.
interrupt_response: Option<bool>
Whether or not to automatically interrupt any ongoing response with output to the default conversation (i.e. conversation
of auto
) when a VAD start event occurs.
prefix_padding_ms: Option<i32>
Used only for server_vad
mode. Amount of audio to include before the VAD detected speech (in milliseconds). Defaults to 300ms.
silence_duration_ms: Option<i32>
Used only for server_vad
mode. Duration of silence to detect speech stop (in milliseconds). Defaults to 500ms. With shorter values the model will respond more quickly, but may jump in on short pauses from the user.
threshold: Option<f32>
Used only for server_vad
mode. Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. A higher threshold will require louder audio to activate the model, and thus might perform better in noisy environments.
_type: Option<String>
Type of turn detection.