openai_realtime/
agent.rs

1use crate::api::model::Model;
2use crate::{
3    AudioFormat, Modality, SessionUpdateEvent, TurnDetection, Voice, WebsocketConfig, websocket,
4};
5use std::sync::Arc;
6use tokio::sync::mpsc::UnboundedReceiver;
7
8#[derive(Debug, Clone, Default)]
9pub struct AgentConfig {
10    pub model: Option<Model>,
11    pub voice: Option<Voice>,
12    pub speed: Option<f32>,
13    pub instructions: Option<String>,
14}
15
16pub async fn connect_realtime_agent(
17    config: AgentConfig,
18) -> anyhow::Result<(Arc<websocket::RealtimeSession>, UnboundedReceiver<Vec<u8>>)> {
19    let voice = config.voice.unwrap_or(Voice::Echo);
20    let model = config.model.unwrap_or(Model::default());
21
22    // create a new realtime agent
23    let rt_config = WebsocketConfig {
24        model,
25        ..Default::default()
26    };
27    if rt_config.api_key_ref.api_key().is_empty() {
28        Err(anyhow::anyhow!(
29            "invalid api key ref: {}",
30            rt_config.api_key_ref
31        ))?;
32    }
33
34    let (rt_client, rx_audio) = websocket::connect(rt_config).await.unwrap();
35
36    let instructions = config.instructions.unwrap_or(
37        r###"
38You are Melissa, a helpful customer support agent.
39You language is en-US.
40"###
41        .to_string(),
42    );
43
44    rt_client.session_update(SessionUpdateEvent {
45        temperature: 0.7.into(),
46        instructions: instructions.into(),
47        speed: config.speed,
48        voice: voice.clone().into(),
49        modalities: vec![Modality::Audio, Modality::Text].into(),
50        turn_detection: TurnDetection {
51            create_response: true,
52            interrupt_response: false,
53            prefix_padding_ms: 300,
54            silence_duration_ms: 1000,
55            td_type: "server_vad".into(),
56            threshold: 0.5,
57        }
58        .into(),
59        input_audio_format: Some(AudioFormat::PCM16),
60        output_audio_format: Some(AudioFormat::PCM16),
61        ..Default::default()
62    })?;
63
64    Ok((rt_client, rx_audio))
65}