async_openai/types/realtime/
session_resource.rs

1use serde::{Deserialize, Serialize};
2
3#[derive(Debug, Serialize, Deserialize, Clone)]
4pub enum AudioFormat {
5    #[serde(rename = "pcm16")]
6    PCM16,
7    #[serde(rename = "g711-ulaw")]
8    G711ULAW,
9    #[serde(rename = "g711-alaw")]
10    G711ALAW,
11}
12
13#[derive(Debug, Serialize, Deserialize, Clone)]
14pub struct AudioTranscription {
15    /// Whether to enable input audio transcription.
16    pub enabled: bool,
17    /// The model to use for transcription (e.g., "whisper-1").
18    pub model: String,
19}
20
21#[derive(Debug, Serialize, Deserialize, Clone)]
22#[serde(tag = "type")]
23pub enum TurnDetection {
24    /// Type of turn detection, only "server_vad" is currently supported.
25    #[serde(rename = "server_vad")]
26    ServerVAD {
27        /// Activation threshold for VAD (0.0 to 1.0).
28        threshold: f32,
29        /// Amount of audio to include before speech starts (in milliseconds).
30        prefix_padding_ms: u32,
31        /// Duration of silence to detect speech stop (in milliseconds).
32        silence_duration_ms: u32,
33    },
34}
35
36#[derive(Debug, Serialize, Deserialize, Clone)]
37pub enum MaxResponseOutputTokens {
38    #[serde(rename = "inf")]
39    Inf,
40    #[serde(untagged)]
41    Num(u16),
42}
43
44#[derive(Debug, Serialize, Deserialize, Clone)]
45#[serde(tag = "type")]
46pub enum ToolDefinition {
47    #[serde(rename = "function")]
48    Function {
49        /// The name of the function.
50        name: String,
51        /// The description of the function.
52        description: String,
53        /// Parameters of the function in JSON Schema.
54        parameters: serde_json::Value,
55    },
56}
57
58#[derive(Debug, Serialize, Deserialize, Clone)]
59#[serde(rename_all = "lowercase")]
60pub enum FunctionType {
61    Function,
62}
63
64#[derive(Debug, Serialize, Deserialize, Clone)]
65#[serde(rename_all = "lowercase")]
66pub enum ToolChoice {
67    Auto,
68    None,
69    Required,
70    #[serde(untagged)]
71    Function {
72        r#type: FunctionType,
73        name: String,
74    },
75}
76
77#[derive(Debug, Serialize, Deserialize, Clone)]
78#[serde(rename_all = "lowercase")]
79pub enum RealtimeVoice {
80    Alloy,
81    Shimmer,
82    Echo,
83}
84
85#[derive(Debug, Serialize, Deserialize, Clone, Default)]
86pub struct SessionResource {
87    /// The default model used for this session.
88    #[serde(skip_serializing_if = "Option::is_none")]
89    pub model: Option<String>,
90
91    /// The set of modalities the model can respond with. To disable audio, set this to ["text"].
92    #[serde(skip_serializing_if = "Option::is_none")]
93    pub modalities: Option<Vec<String>>,
94
95    //// The default system instructions prepended to model calls.
96    #[serde(skip_serializing_if = "Option::is_none")]
97    pub instructions: Option<String>,
98
99    /// The voice the model uses to respond. Cannot be changed once the model has responded with audio at least once.
100    #[serde(skip_serializing_if = "Option::is_none")]
101    pub voice: Option<RealtimeVoice>,
102
103    /// The format of input audio. Options are "pcm16", "g711_ulaw", or "g711_alaw".
104    #[serde(skip_serializing_if = "Option::is_none")]
105    pub input_audio_format: Option<AudioFormat>,
106
107    /// The format of output audio. Options are "pcm16", "g711_ulaw", or "g711_alaw".
108    #[serde(skip_serializing_if = "Option::is_none")]
109    pub output_audio_format: Option<AudioFormat>,
110
111    /// Configuration for input audio transcription. Can be set to null to turn off.
112    #[serde(skip_serializing_if = "Option::is_none")]
113    pub input_audio_transcription: Option<AudioTranscription>,
114
115    /// Configuration for turn detection. Can be set to null to turn off.
116    #[serde(skip_serializing_if = "Option::is_none")]
117    pub turn_detection: Option<TurnDetection>,
118
119    /// Tools (functions) available to the model.
120    #[serde(skip_serializing_if = "Option::is_none")]
121    pub tools: Option<Vec<ToolDefinition>>,
122
123    #[serde(skip_serializing_if = "Option::is_none")]
124    /// How the model chooses tools.
125    pub tool_choice: Option<ToolChoice>,
126
127    #[serde(skip_serializing_if = "Option::is_none")]
128    /// Sampling temperature for the model.
129    pub temperature: Option<f32>,
130
131    /// Maximum number of output tokens for a single assistant response, inclusive of tool calls.
132    /// Provide an integer between 1 and 4096 to limit output tokens, or "inf" for the maximum available tokens for a given model.
133    /// Defaults to "inf".
134    #[serde(skip_serializing_if = "Option::is_none")]
135    pub max_response_output_tokens: Option<MaxResponseOutputTokens>,
136}