Skip to main content

adk_realtime/
events.rs

1//! Event types for realtime communication.
2//!
3//! These events follow a unified model inspired by the OpenAI Agents SDK,
4//! abstracting over provider-specific event formats.
5//!
6//! Audio data is transported as raw bytes (`Vec<u8>`) internally but serialized
7//! as base64 on the wire for JSON compatibility.
8
9use base64::Engine;
10use serde::{Deserialize, Serialize};
11use serde_json::Value;
12
13// ── Custom serde for base64-encoded audio ───────────────────────────────
14
15fn deserialize_audio_bytes<'de, D>(deserializer: D) -> Result<Vec<u8>, D::Error>
16where
17    D: serde::Deserializer<'de>,
18{
19    let s = String::deserialize(deserializer)?;
20    base64::engine::general_purpose::STANDARD.decode(&s).map_err(serde::de::Error::custom)
21}
22
23fn serialize_audio_bytes<S>(bytes: &[u8], serializer: S) -> Result<S::Ok, S::Error>
24where
25    S: serde::Serializer,
26{
27    let s = base64::engine::general_purpose::STANDARD.encode(bytes);
28    serializer.serialize_str(&s)
29}
30
31// ── Client Events ───────────────────────────────────────────────────────
32
33/// Events sent from the client to the realtime server.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35#[serde(tag = "type")]
36pub enum ClientEvent {
37    /// Update session configuration.
38    #[serde(rename = "session.update")]
39    SessionUpdate {
40        /// Updated session configuration.
41        session: Value,
42    },
43
44    /// Append audio to the input buffer.
45    #[serde(rename = "input_audio_buffer.append")]
46    AudioDelta {
47        /// Optional event ID.
48        #[serde(skip_serializing_if = "Option::is_none")]
49        event_id: Option<String>,
50        /// Audio data (raw bytes, serialized as base64 on the wire).
51        #[serde(
52            serialize_with = "serialize_audio_bytes",
53            deserialize_with = "deserialize_audio_bytes"
54        )]
55        audio: Vec<u8>,
56        /// Audio format metadata for multi-format pipelines and debugging.
57        /// Skipped during serialization — the server infers format from the session config.
58        #[serde(skip)]
59        format: Option<crate::audio::AudioFormat>,
60    },
61
62    /// Commit the current audio buffer (manual mode).
63    #[serde(rename = "input_audio_buffer.commit")]
64    InputAudioBufferCommit,
65
66    /// Clear the audio input buffer.
67    #[serde(rename = "input_audio_buffer.clear")]
68    InputAudioBufferClear,
69
70    /// Send a text message or tool response.
71    #[serde(rename = "conversation.item.create")]
72    ConversationItemCreate {
73        /// The conversation item (flexible JSON for provider compatibility).
74        item: Value,
75    },
76
77    /// Trigger a response from the model.
78    #[serde(rename = "response.create")]
79    ResponseCreate {
80        /// Optional response configuration.
81        #[serde(skip_serializing_if = "Option::is_none")]
82        config: Option<Value>,
83    },
84
85    /// Cancel/interrupt the current response.
86    #[serde(rename = "response.cancel")]
87    ResponseCancel,
88
89    /// A standard message using `adk_core`'s native Role and Part types.
90    #[serde(rename = "message")]
91    Message {
92        /// Role of the message.
93        role: String,
94        /// Content parts of the message.
95        parts: Vec<adk_core::types::Part>,
96    },
97
98    /// Universal intent to update session configuration mid-flight.
99    ///
100    /// This is treated as a runner/control-plane internal intent and should not
101    /// be sent directly to providers without interception. By construction, it
102    /// is explicitly untagged from serialization to guarantee it cannot
103    /// leak onto the WebSocket wire.
104    #[serde(skip_serializing)]
105    UpdateSession {
106        /// New system instructions.
107        #[serde(skip_serializing_if = "Option::is_none")]
108        instructions: Option<String>,
109        /// New tools definition.
110        #[serde(skip_serializing_if = "Option::is_none")]
111        tools: Option<Vec<crate::config::ToolDefinition>>,
112    },
113}
114
115/// A conversation item for text or tool responses.
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct ConversationItem {
118    /// Unique ID for this item.
119    #[serde(skip_serializing_if = "Option::is_none")]
120    pub id: Option<String>,
121    /// Item type: "message" or "function_call_output".
122    #[serde(rename = "type")]
123    pub item_type: String,
124    /// Role: "user", "assistant", or "system".
125    #[serde(skip_serializing_if = "Option::is_none")]
126    pub role: Option<String>,
127    /// Content parts.
128    #[serde(skip_serializing_if = "Option::is_none")]
129    pub content: Option<Vec<ContentPart>>,
130    /// For tool responses: the call ID being responded to.
131    #[serde(skip_serializing_if = "Option::is_none")]
132    pub call_id: Option<String>,
133    /// For tool responses: the output value.
134    #[serde(skip_serializing_if = "Option::is_none")]
135    pub output: Option<String>,
136}
137
138/// A content part within a conversation item.
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct ContentPart {
141    /// Content type: "input_text", "input_audio", "text", "audio".
142    #[serde(rename = "type")]
143    pub content_type: String,
144    /// Text content.
145    #[serde(skip_serializing_if = "Option::is_none")]
146    pub text: Option<String>,
147    /// Base64-encoded audio content.
148    #[serde(skip_serializing_if = "Option::is_none")]
149    pub audio: Option<String>,
150    /// Transcript of audio content.
151    #[serde(skip_serializing_if = "Option::is_none")]
152    pub transcript: Option<String>,
153}
154
155impl ConversationItem {
156    /// Create a user text message item.
157    pub fn user_text(text: impl Into<String>) -> Self {
158        Self {
159            id: None,
160            item_type: "message".to_string(),
161            role: Some("user".to_string()),
162            content: Some(vec![ContentPart {
163                content_type: "input_text".to_string(),
164                text: Some(text.into()),
165                audio: None,
166                transcript: None,
167            }]),
168            call_id: None,
169            output: None,
170        }
171    }
172
173    /// Create a tool response item.
174    pub fn tool_response(call_id: impl Into<String>, output: impl Into<String>) -> Self {
175        Self {
176            id: None,
177            item_type: "function_call_output".to_string(),
178            role: None,
179            content: None,
180            call_id: Some(call_id.into()),
181            output: Some(output.into()),
182        }
183    }
184}
185
186// ── Server Events ───────────────────────────────────────────────────────
187
188/// Events received from the realtime server.
189///
190/// This is a unified event type that abstracts over provider-specific formats.
191/// Audio data is stored as raw bytes (`Vec<u8>`) — decoded from base64 at the
192/// transport boundary so consumers never need to deal with encoding.
193#[derive(Debug, Clone, Serialize, Deserialize)]
194#[serde(tag = "type")]
195pub enum ServerEvent {
196    /// Session was created/connected.
197    #[serde(rename = "session.created")]
198    SessionCreated {
199        /// Unique event ID.
200        event_id: String,
201        /// Session details.
202        session: Value,
203    },
204
205    /// Session configuration was updated.
206    #[serde(rename = "session.updated")]
207    SessionUpdated {
208        /// Unique event ID.
209        event_id: String,
210        /// Updated session details.
211        session: Value,
212    },
213
214    /// Error occurred.
215    #[serde(rename = "error")]
216    Error {
217        /// Unique event ID.
218        event_id: String,
219        /// Error details.
220        error: ErrorInfo,
221    },
222
223    /// User speech started (VAD detected).
224    #[serde(rename = "input_audio_buffer.speech_started")]
225    SpeechStarted {
226        /// Unique event ID.
227        event_id: String,
228        /// Audio start time in milliseconds.
229        audio_start_ms: u64,
230    },
231
232    /// User speech ended (VAD detected).
233    #[serde(rename = "input_audio_buffer.speech_stopped")]
234    SpeechStopped {
235        /// Unique event ID.
236        event_id: String,
237        /// Audio end time in milliseconds.
238        audio_end_ms: u64,
239    },
240
241    /// Audio input buffer was committed.
242    #[serde(rename = "input_audio_buffer.committed")]
243    AudioCommitted {
244        /// Unique event ID.
245        event_id: String,
246        /// ID of the created item.
247        item_id: String,
248    },
249
250    /// Audio input buffer was cleared.
251    #[serde(rename = "input_audio_buffer.cleared")]
252    AudioCleared {
253        /// Unique event ID.
254        event_id: String,
255    },
256
257    /// Conversation item was created.
258    #[serde(rename = "conversation.item.created")]
259    ItemCreated {
260        /// Unique event ID.
261        event_id: String,
262        /// The created item.
263        item: Value,
264    },
265
266    /// Response generation started.
267    #[serde(rename = "response.created")]
268    ResponseCreated {
269        /// Unique event ID.
270        event_id: String,
271        /// Response details.
272        response: Value,
273    },
274
275    /// Response generation completed.
276    #[serde(rename = "response.done")]
277    ResponseDone {
278        /// Unique event ID.
279        event_id: String,
280        /// Final response details.
281        response: Value,
282    },
283
284    /// Response output item added.
285    #[serde(rename = "response.output_item.added")]
286    OutputItemAdded {
287        /// Unique event ID.
288        event_id: String,
289        /// Response ID.
290        response_id: String,
291        /// Output index.
292        output_index: u32,
293        /// The output item.
294        item: Value,
295    },
296
297    /// Response output item completed.
298    #[serde(rename = "response.output_item.done")]
299    OutputItemDone {
300        /// Unique event ID.
301        event_id: String,
302        /// Response ID.
303        response_id: String,
304        /// Output index.
305        output_index: u32,
306        /// The completed item.
307        item: Value,
308    },
309
310    /// Audio delta (chunk of output audio as raw bytes).
311    #[serde(rename = "response.audio.delta")]
312    AudioDelta {
313        /// Unique event ID.
314        event_id: String,
315        /// Response ID.
316        response_id: String,
317        /// Item ID.
318        item_id: String,
319        /// Output index.
320        output_index: u32,
321        /// Content index.
322        content_index: u32,
323        /// Audio data (raw bytes, serialized as base64 on the wire).
324        #[serde(
325            serialize_with = "serialize_audio_bytes",
326            deserialize_with = "deserialize_audio_bytes"
327        )]
328        delta: Vec<u8>,
329    },
330
331    /// Audio output completed.
332    #[serde(rename = "response.audio.done")]
333    AudioDone {
334        /// Unique event ID.
335        event_id: String,
336        /// Response ID.
337        response_id: String,
338        /// Item ID.
339        item_id: String,
340        /// Output index.
341        output_index: u32,
342        /// Content index.
343        content_index: u32,
344    },
345
346    /// Text delta (chunk of output text).
347    #[serde(rename = "response.text.delta")]
348    TextDelta {
349        /// Unique event ID.
350        event_id: String,
351        /// Response ID.
352        response_id: String,
353        /// Item ID.
354        item_id: String,
355        /// Output index.
356        output_index: u32,
357        /// Content index.
358        content_index: u32,
359        /// Text content.
360        delta: String,
361    },
362
363    /// Text output completed.
364    #[serde(rename = "response.text.done")]
365    TextDone {
366        /// Unique event ID.
367        event_id: String,
368        /// Response ID.
369        response_id: String,
370        /// Item ID.
371        item_id: String,
372        /// Output index.
373        output_index: u32,
374        /// Content index.
375        content_index: u32,
376        /// Complete text.
377        text: String,
378    },
379
380    /// Audio transcript delta.
381    #[serde(rename = "response.audio_transcript.delta")]
382    TranscriptDelta {
383        /// Unique event ID.
384        event_id: String,
385        /// Response ID.
386        response_id: String,
387        /// Item ID.
388        item_id: String,
389        /// Output index.
390        output_index: u32,
391        /// Content index.
392        content_index: u32,
393        /// Transcript delta.
394        delta: String,
395    },
396
397    /// Audio transcript completed.
398    #[serde(rename = "response.audio_transcript.done")]
399    TranscriptDone {
400        /// Unique event ID.
401        event_id: String,
402        /// Response ID.
403        response_id: String,
404        /// Item ID.
405        item_id: String,
406        /// Output index.
407        output_index: u32,
408        /// Content index.
409        content_index: u32,
410        /// Complete transcript.
411        transcript: String,
412    },
413
414    /// Function call arguments delta.
415    #[serde(rename = "response.function_call_arguments.delta")]
416    FunctionCallDelta {
417        /// Unique event ID.
418        event_id: String,
419        /// Response ID.
420        response_id: String,
421        /// Item ID.
422        item_id: String,
423        /// Output index.
424        output_index: u32,
425        /// Call ID.
426        call_id: String,
427        /// Arguments delta.
428        delta: String,
429    },
430
431    /// Function call completed.
432    #[serde(rename = "response.function_call_arguments.done")]
433    FunctionCallDone {
434        /// Unique event ID.
435        event_id: String,
436        /// Response ID.
437        response_id: String,
438        /// Item ID.
439        item_id: String,
440        /// Output index.
441        output_index: u32,
442        /// Call ID.
443        call_id: String,
444        /// Function name.
445        name: String,
446        /// Complete arguments.
447        arguments: String,
448    },
449
450    /// Rate limit information.
451    #[serde(rename = "rate_limits.updated")]
452    RateLimitsUpdated {
453        /// Unique event ID.
454        event_id: String,
455        /// Rate limit details.
456        rate_limits: Vec<RateLimit>,
457    },
458
459    /// Unknown event type (for forward compatibility).
460    #[serde(other)]
461    Unknown,
462}
463
464/// Error information from the server.
465#[derive(Debug, Clone, Serialize, Deserialize)]
466pub struct ErrorInfo {
467    /// Error type/code.
468    #[serde(rename = "type")]
469    pub error_type: String,
470    /// Error code.
471    #[serde(skip_serializing_if = "Option::is_none")]
472    pub code: Option<String>,
473    /// Human-readable error message.
474    pub message: String,
475    /// Additional error parameters.
476    #[serde(skip_serializing_if = "Option::is_none")]
477    pub param: Option<String>,
478}
479
480/// Rate limit information.
481#[derive(Debug, Clone, Serialize, Deserialize)]
482pub struct RateLimit {
483    /// Limit name.
484    pub name: String,
485    /// Maximum allowed.
486    pub limit: u64,
487    /// Currently remaining.
488    pub remaining: u64,
489    /// Time until reset.
490    pub reset_seconds: f64,
491}
492
493/// A simplified tool call representation.
494#[derive(Debug, Clone, Serialize, Deserialize)]
495pub struct ToolCall {
496    /// Unique call ID (used for responses).
497    pub call_id: String,
498    /// Tool/function name.
499    pub name: String,
500    /// Arguments as JSON.
501    pub arguments: Value,
502}
503
504/// A tool response to send back to the model.
505#[derive(Debug, Clone, Serialize, Deserialize)]
506pub struct ToolResponse {
507    /// The call ID being responded to.
508    pub call_id: String,
509    /// The result/output of the tool execution.
510    pub output: Value,
511}
512
513impl ToolResponse {
514    /// Create a new tool response.
515    pub fn new(call_id: impl Into<String>, output: impl Serialize) -> Self {
516        Self {
517            call_id: call_id.into(),
518            output: serde_json::to_value(output).unwrap_or(Value::Null),
519        }
520    }
521
522    /// Create a tool response from a string output.
523    pub fn from_string(call_id: impl Into<String>, output: impl Into<String>) -> Self {
524        Self { call_id: call_id.into(), output: Value::String(output.into()) }
525    }
526}