1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
//! Example of appending audio to the input buffer using the OpenAI API.
//!
//! Run with:
//! `cargo run --example realtime_input_audio_buffer_append`
//!
//! Make sure you have set the `OPENAI_API_KEY` environment variable
//! or have a `secret/-secret.sh` file with the key.
//!
//! **NOTE:** This event does not have a direct confirmation server event like
//! `input_audio_buffer.appended`. Confirmation is implicit or through subsequent
//! events like VAD detection or transcription results. This example sends the
//! data and waits briefly for any follow-up events.
use api_openai::ClientApiAccessors;
use api_openai::
{
client ::Client,
error ::OpenAIError,
api ::realtime::{ RealtimeClient, ws::WsSession },
components ::realtime_shared::
{
RealtimeSessionCreateRequest,
RealtimeClientEventInputAudioBufferAppend,
},
};
use tracing_subscriber::{ EnvFilter, fmt }; // Added for logging
use base64::{ engine::general_purpose::STANDARD as base64_engine, Engine as _ }; // For base64 encoding
#[ tokio::main( flavor = "current_thread" ) ]
async fn main() -> Result< (), OpenAIError >
{
// Setup tracing for logging
fmt()
.with_env_filter( EnvFilter::from_default_env().add_directive( "api_openai=trace".parse().unwrap() ) )
.init();
// Load environment variables
dotenv ::from_filename( "./secret/-secret.sh" ).ok();
// 1. Create a new OpenAI client.
tracing ::info!( "Initializing client..." );
let client = Client::new();
// 2. Create the request payload to initiate the session, configuring audio input.
tracing ::info!( "Building realtime session request..." );
let request = RealtimeSessionCreateRequest::former()
.model( "gpt-4o-realtime-preview".to_string() )
.input_audio_format( "pcm16" ) // Specify the format of the audio we'll send
// Optional : Configure transcription if you want to see results
// .input_audio_transcription( RealtimeSessionInputAudioTranscription::former().model( "whisper-1" ).form() )
// Optional : Configure VAD
// .turn_detection( RealtimeSessionTurnDetection::former().r#type( "server_vad" ).form() )
.temperature( 0.7 )
.form();
tracing ::info!( "Sending request to OpenAI API to create session..." );
// 3. Call the API endpoint to get session details.
let session = client.realtime().create( request ).await?;
tracing ::info!( "Creating Realtime WebSocket Session Client..." );
let token = session.client_secret.value;
// 4. Establish the WebSocket connection using the session token.
let session_client = WsSession::connect( client.environment().clone(), Some( &token ) ).await?;
// --- Prepare Dummy Audio Data ---
let dummy_audio_bytes = include_bytes!("data/example.wav");
let audio_base64 = base64_engine.encode( &dummy_audio_bytes );
// 5. Prepare the client event to append the audio data.
let iaba_append = RealtimeClientEventInputAudioBufferAppend::former()
.audio( audio_base64 ) // Provide the base64 encoded audio
.form();
tracing ::info!( "Sending input_audio_buffer.append event..." );
// 6. Send the append event over the WebSocket.
session_client.input_audio_buffer_append( iaba_append ).await?;
tracing ::info!( "Audio append event sent." );
// 7. Wait briefly and read any subsequent events (like VAD or transcription).
// There's no direct 'appended' confirmation.
tracing ::info!( "Waiting briefly for any subsequent events (no direct confirmation expected)..." );
let wait_duration = tokio::time::Duration::from_secs( 2 ); // Wait for 2 seconds
let start_time = tokio::time::Instant::now();
loop
{
// Check timeout first
if start_time.elapsed() > wait_duration
{
println!( "\nWait duration elapsed. No specific confirmation event for append." );
break;
}
// Try reading with a small timeout to avoid blocking forever if nothing comes
let read_timeout = tokio::time::Duration::from_millis( 100 );
match tokio::time::timeout( read_timeout, session_client.read_event() ).await
{
Ok( response_result ) => match response_result
{
Ok(Some(event)) =>
{
println!( "\n--- Received Subsequent Event ---" );
println!( "{event:?}" );
// Depending on session config, you might see:
// - InputAudioBufferSpeechStarted/Stopped
// - ConversationItemInputAudioTranscriptionDelta/Completed
// etc.
}
Ok( None ) =>
{
println!( "\nWebSocket connection closed by server unexpectedly." );
return Err( OpenAIError::WsConnectionClosed );
}
Err( e ) =>
{
eprintln!("\nError reading from WebSocket : {:?}", e);
return Err(e);
}
},
Err( _ ) =>
{
// Timeout elapsed for this read attempt, continue checking overall wait duration
continue;
}
}
}
// Since there's no direct confirmation, we usually consider the send successful if no error occurred.
println!( "Successfully sent input_audio_buffer.append event." );
Ok( () )
}