1use crate::api::model::Model;
2use crate::{
3 AudioFormat, Modality, SessionUpdateEvent, TurnDetection, Voice, WebsocketConfig, websocket,
4};
5use std::sync::Arc;
6use tokio::sync::mpsc::UnboundedReceiver;
7
8#[derive(Debug, Clone, Default)]
9pub struct AgentConfig {
10 pub model: Option<Model>,
11 pub voice: Option<Voice>,
12 pub speed: Option<f32>,
13 pub instructions: Option<String>,
14}
15
16pub async fn connect_realtime_agent(
17 config: AgentConfig,
18) -> anyhow::Result<(Arc<websocket::RealtimeSession>, UnboundedReceiver<Vec<u8>>)> {
19 let voice = config.voice.unwrap_or(Voice::Echo);
20 let model = config.model.unwrap_or(Model::default());
21
22 let rt_config = WebsocketConfig {
24 model,
25 ..Default::default()
26 };
27 if rt_config.api_key_ref.api_key().is_empty() {
28 Err(anyhow::anyhow!(
29 "invalid api key ref: {}",
30 rt_config.api_key_ref
31 ))?;
32 }
33
34 let (rt_client, rx_audio) = websocket::connect(rt_config).await.unwrap();
35
36 let instructions = config.instructions.unwrap_or(
37 r###"
38You are Melissa, a helpful customer support agent.
39You language is en-US.
40"###
41 .to_string(),
42 );
43
44 rt_client.session_update(SessionUpdateEvent {
45 temperature: 0.7.into(),
46 instructions: instructions.into(),
47 speed: config.speed,
48 voice: voice.clone().into(),
49 modalities: vec![Modality::Audio, Modality::Text].into(),
50 turn_detection: TurnDetection {
51 create_response: true,
52 interrupt_response: false,
53 prefix_padding_ms: 300,
54 silence_duration_ms: 1000,
55 td_type: "server_vad".into(),
56 threshold: 0.5,
57 }
58 .into(),
59 input_audio_format: Some(AudioFormat::PCM16),
60 output_audio_format: Some(AudioFormat::PCM16),
61 ..Default::default()
62 })?;
63
64 Ok((rt_client, rx_audio))
65}