Skip to main content

openai_tools/realtime/
session.rs

1//! Session configuration types for the Realtime API.
2
3use crate::common::parameters::{Name, ParameterProperty, Parameters};
4use crate::common::tool::Tool;
5use serde::{Deserialize, Serialize};
6
7use super::audio::{AudioFormat, InputAudioNoiseReduction, InputAudioTranscription, Voice};
8use super::vad::TurnDetection;
9
10/// Tool definition for the Realtime API.
11///
12/// The Realtime API uses a flattened tool format, unlike the Chat Completions API
13/// which nests the function details under a `function` key.
14///
15/// # Example
16///
17/// ```rust
18/// use openai_tools::realtime::RealtimeTool;
19/// use openai_tools::common::parameters::ParameterProperty;
20///
21/// let tool = RealtimeTool::function(
22///     "get_weather",
23///     "Get the current weather for a location",
24///     vec![("location", ParameterProperty::from_string("The city name"))],
25/// );
26/// ```
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct RealtimeTool {
29    /// The type of tool (always "function" for function calling).
30    #[serde(rename = "type")]
31    pub type_name: String,
32
33    /// The name of the function.
34    pub name: String,
35
36    /// A description of what the function does.
37    #[serde(skip_serializing_if = "Option::is_none")]
38    pub description: Option<String>,
39
40    /// The parameters the function accepts.
41    #[serde(skip_serializing_if = "Option::is_none")]
42    pub parameters: Option<Parameters>,
43}
44
45impl RealtimeTool {
46    /// Create a new function tool.
47    pub fn function<T, U, V>(name: T, description: U, parameters: Vec<(V, ParameterProperty)>) -> Self
48    where
49        T: Into<String>,
50        U: Into<String>,
51        V: AsRef<str>,
52    {
53        let params: Vec<(Name, ParameterProperty)> = parameters.into_iter().map(|(k, v)| (k.as_ref().to_string(), v)).collect();
54
55        Self {
56            type_name: "function".to_string(),
57            name: name.into(),
58            description: Some(description.into()),
59            parameters: Some(Parameters::new(params, None)),
60        }
61    }
62}
63
64impl From<Tool> for RealtimeTool {
65    /// Convert a Chat API tool to a Realtime API tool.
66    fn from(tool: Tool) -> Self {
67        if let Some(func) = tool.function {
68            Self { type_name: "function".to_string(), name: func.name, description: func.description, parameters: func.parameters }
69        } else {
70            // Fallback for tools without function definition
71            Self { type_name: tool.type_name, name: tool.name.unwrap_or_default(), description: None, parameters: tool.parameters }
72        }
73    }
74}
75
76/// Session modality - what types of input/output are supported.
77#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
78#[serde(rename_all = "lowercase")]
79pub enum Modality {
80    /// Text input/output
81    Text,
82    /// Audio input/output
83    Audio,
84}
85
86/// Session configuration sent in session.update events.
87#[derive(Debug, Clone, Default, Serialize, Deserialize)]
88pub struct SessionConfig {
89    /// Supported modalities for this session.
90    #[serde(skip_serializing_if = "Option::is_none")]
91    pub modalities: Option<Vec<Modality>>,
92
93    /// System instructions for the model.
94    #[serde(skip_serializing_if = "Option::is_none")]
95    pub instructions: Option<String>,
96
97    /// Voice for audio output.
98    #[serde(skip_serializing_if = "Option::is_none")]
99    pub voice: Option<Voice>,
100
101    /// Format for input audio.
102    #[serde(skip_serializing_if = "Option::is_none")]
103    pub input_audio_format: Option<AudioFormat>,
104
105    /// Format for output audio.
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub output_audio_format: Option<AudioFormat>,
108
109    /// Configuration for input audio transcription.
110    #[serde(skip_serializing_if = "Option::is_none")]
111    pub input_audio_transcription: Option<InputAudioTranscription>,
112
113    /// Noise reduction configuration.
114    #[serde(skip_serializing_if = "Option::is_none")]
115    pub input_audio_noise_reduction: Option<InputAudioNoiseReduction>,
116
117    /// Turn detection configuration.
118    #[serde(skip_serializing_if = "Option::is_none")]
119    pub turn_detection: Option<TurnDetection>,
120
121    /// Available tools for function calling.
122    #[serde(skip_serializing_if = "Option::is_none")]
123    pub tools: Option<Vec<RealtimeTool>>,
124
125    /// How to select tools.
126    #[serde(skip_serializing_if = "Option::is_none")]
127    pub tool_choice: Option<ToolChoice>,
128
129    /// Sampling temperature (0.6 to 1.2).
130    #[serde(skip_serializing_if = "Option::is_none")]
131    pub temperature: Option<f32>,
132
133    /// Maximum tokens in a response.
134    #[serde(skip_serializing_if = "Option::is_none")]
135    pub max_response_output_tokens: Option<MaxTokens>,
136}
137
138impl SessionConfig {
139    /// Create a new empty session configuration.
140    pub fn new() -> Self {
141        Self::default()
142    }
143
144    /// Set the modalities.
145    pub fn with_modalities(mut self, modalities: Vec<Modality>) -> Self {
146        self.modalities = Some(modalities);
147        self
148    }
149
150    /// Set the instructions.
151    pub fn with_instructions(mut self, instructions: impl Into<String>) -> Self {
152        self.instructions = Some(instructions.into());
153        self
154    }
155
156    /// Set the voice.
157    pub fn with_voice(mut self, voice: Voice) -> Self {
158        self.voice = Some(voice);
159        self
160    }
161
162    /// Set the input audio format.
163    pub fn with_input_audio_format(mut self, format: AudioFormat) -> Self {
164        self.input_audio_format = Some(format);
165        self
166    }
167
168    /// Set the output audio format.
169    pub fn with_output_audio_format(mut self, format: AudioFormat) -> Self {
170        self.output_audio_format = Some(format);
171        self
172    }
173
174    /// Set the transcription configuration.
175    pub fn with_transcription(mut self, config: InputAudioTranscription) -> Self {
176        self.input_audio_transcription = Some(config);
177        self
178    }
179
180    /// Set the turn detection configuration.
181    pub fn with_turn_detection(mut self, config: TurnDetection) -> Self {
182        self.turn_detection = Some(config);
183        self
184    }
185
186    /// Set the available tools.
187    ///
188    /// Accepts `Tool` from the common module and converts to `RealtimeTool`.
189    pub fn with_tools(mut self, tools: Vec<Tool>) -> Self {
190        self.tools = Some(tools.into_iter().map(RealtimeTool::from).collect());
191        self
192    }
193
194    /// Set the available realtime tools directly.
195    pub fn with_realtime_tools(mut self, tools: Vec<RealtimeTool>) -> Self {
196        self.tools = Some(tools);
197        self
198    }
199
200    /// Set the tool choice.
201    pub fn with_tool_choice(mut self, choice: ToolChoice) -> Self {
202        self.tool_choice = Some(choice);
203        self
204    }
205
206    /// Set the temperature.
207    pub fn with_temperature(mut self, temp: f32) -> Self {
208        self.temperature = Some(temp);
209        self
210    }
211
212    /// Set the maximum response tokens.
213    pub fn with_max_tokens(mut self, max: MaxTokens) -> Self {
214        self.max_response_output_tokens = Some(max);
215        self
216    }
217}
218
219/// Maximum tokens configuration.
220#[derive(Debug, Clone)]
221pub enum MaxTokens {
222    /// Specific token count limit.
223    Count(u32),
224    /// No limit (infinite).
225    Infinite,
226}
227
228impl serde::Serialize for MaxTokens {
229    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
230    where
231        S: serde::Serializer,
232    {
233        match self {
234            MaxTokens::Count(n) => serializer.serialize_u32(*n),
235            MaxTokens::Infinite => serializer.serialize_str("inf"),
236        }
237    }
238}
239
240impl<'de> serde::Deserialize<'de> for MaxTokens {
241    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
242    where
243        D: serde::Deserializer<'de>,
244    {
245        use serde::de::{self, Visitor};
246
247        struct MaxTokensVisitor;
248
249        impl<'de> Visitor<'de> for MaxTokensVisitor {
250            type Value = MaxTokens;
251
252            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
253                formatter.write_str("a positive integer or \"inf\"")
254            }
255
256            fn visit_u64<E>(self, value: u64) -> std::result::Result<MaxTokens, E>
257            where
258                E: de::Error,
259            {
260                Ok(MaxTokens::Count(value as u32))
261            }
262
263            fn visit_str<E>(self, value: &str) -> std::result::Result<MaxTokens, E>
264            where
265                E: de::Error,
266            {
267                if value == "inf" {
268                    Ok(MaxTokens::Infinite)
269                } else {
270                    Err(de::Error::custom(format!("unknown value: {}", value)))
271                }
272            }
273        }
274
275        deserializer.deserialize_any(MaxTokensVisitor)
276    }
277}
278
279impl From<u32> for MaxTokens {
280    fn from(count: u32) -> Self {
281        Self::Count(count)
282    }
283}
284
285/// How to select tools for function calling.
286#[derive(Debug, Clone, Serialize, Deserialize)]
287#[serde(untagged)]
288pub enum ToolChoice {
289    /// Simple string-based choices: "auto", "none", "required"
290    Simple(SimpleToolChoice),
291    /// Force a specific function by name
292    Function(NamedToolChoice),
293}
294
295/// Simple tool choice options.
296#[derive(Debug, Clone, Serialize, Deserialize)]
297#[serde(rename_all = "lowercase")]
298pub enum SimpleToolChoice {
299    /// Model decides whether to use tools.
300    Auto,
301    /// Never use tools.
302    None,
303    /// Must use a tool.
304    Required,
305}
306
307impl Default for ToolChoice {
308    fn default() -> Self {
309        Self::Simple(SimpleToolChoice::Auto)
310    }
311}
312
313impl ToolChoice {
314    /// Model decides whether to use tools.
315    pub fn auto() -> Self {
316        Self::Simple(SimpleToolChoice::Auto)
317    }
318
319    /// Never use tools.
320    pub fn none() -> Self {
321        Self::Simple(SimpleToolChoice::None)
322    }
323
324    /// Must use a tool.
325    pub fn required() -> Self {
326        Self::Simple(SimpleToolChoice::Required)
327    }
328
329    /// Force a specific function by name.
330    pub fn function(name: impl Into<String>) -> Self {
331        Self::Function(NamedToolChoice { type_name: "function".to_string(), function: NamedFunction { name: name.into() } })
332    }
333}
334
335/// Named tool choice for forcing a specific function.
336#[derive(Debug, Clone, Serialize, Deserialize)]
337pub struct NamedToolChoice {
338    #[serde(rename = "type")]
339    pub type_name: String,
340    pub function: NamedFunction,
341}
342
343/// Function name for named tool choice.
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub struct NamedFunction {
346    pub name: String,
347}
348
349/// Response creation configuration.
350#[derive(Debug, Clone, Default, Serialize, Deserialize)]
351pub struct ResponseCreateConfig {
352    /// Modalities for this response.
353    #[serde(skip_serializing_if = "Option::is_none")]
354    pub modalities: Option<Vec<Modality>>,
355
356    /// Instructions for this response.
357    #[serde(skip_serializing_if = "Option::is_none")]
358    pub instructions: Option<String>,
359
360    /// Voice for this response.
361    #[serde(skip_serializing_if = "Option::is_none")]
362    pub voice: Option<Voice>,
363
364    /// Output audio format.
365    #[serde(skip_serializing_if = "Option::is_none")]
366    pub output_audio_format: Option<AudioFormat>,
367
368    /// Tools available for this response.
369    #[serde(skip_serializing_if = "Option::is_none")]
370    pub tools: Option<Vec<RealtimeTool>>,
371
372    /// Tool choice for this response.
373    #[serde(skip_serializing_if = "Option::is_none")]
374    pub tool_choice: Option<ToolChoice>,
375
376    /// Temperature for this response.
377    #[serde(skip_serializing_if = "Option::is_none")]
378    pub temperature: Option<f32>,
379
380    /// Maximum output tokens.
381    #[serde(skip_serializing_if = "Option::is_none")]
382    pub max_output_tokens: Option<MaxTokens>,
383
384    /// Whether to include in conversation history.
385    /// Set to "none" to exclude.
386    #[serde(skip_serializing_if = "Option::is_none")]
387    pub conversation: Option<String>,
388
389    /// Metadata for this response.
390    #[serde(skip_serializing_if = "Option::is_none")]
391    pub metadata: Option<serde_json::Value>,
392}
393
394impl ResponseCreateConfig {
395    /// Create a new empty response configuration.
396    pub fn new() -> Self {
397        Self::default()
398    }
399
400    /// Set the modalities.
401    pub fn with_modalities(mut self, modalities: Vec<Modality>) -> Self {
402        self.modalities = Some(modalities);
403        self
404    }
405
406    /// Set the instructions.
407    pub fn with_instructions(mut self, instructions: impl Into<String>) -> Self {
408        self.instructions = Some(instructions.into());
409        self
410    }
411
412    /// Set the voice.
413    pub fn with_voice(mut self, voice: Voice) -> Self {
414        self.voice = Some(voice);
415        self
416    }
417
418    /// Exclude this response from conversation history.
419    pub fn out_of_band(mut self) -> Self {
420        self.conversation = Some("none".to_string());
421        self
422    }
423}