1use serde::{Deserialize, Serialize};
2
3use crate::config::{GenerationConfig, SpeechConfig, ThinkingConfig};
4use crate::content::{Blob, Content, FunctionCall, FunctionResponse};
5use crate::enums::{
6 ActivityHandling, EndSensitivity, MediaResolution, Modality, StartSensitivity,
7 TurnCompleteReason, TurnCoverage, VadSignalType,
8};
9use crate::grounding::GroundingMetadata;
10use crate::http::HttpOptions;
11use crate::response::{UrlContextMetadata, UsageMetadata};
12use crate::tool::Tool;
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16#[serde(rename_all = "camelCase")]
17pub struct LiveClientSetup {
18 #[serde(skip_serializing_if = "Option::is_none")]
19 pub model: Option<String>,
20 #[serde(skip_serializing_if = "Option::is_none")]
21 pub generation_config: Option<GenerationConfig>,
22 #[serde(skip_serializing_if = "Option::is_none")]
23 pub system_instruction: Option<Content>,
24 #[serde(skip_serializing_if = "Option::is_none")]
25 pub tools: Option<Vec<Tool>>,
26 #[serde(skip_serializing_if = "Option::is_none")]
27 pub realtime_input_config: Option<RealtimeInputConfig>,
28 #[serde(skip_serializing_if = "Option::is_none")]
29 pub session_resumption: Option<SessionResumptionConfig>,
30 #[serde(skip_serializing_if = "Option::is_none")]
31 pub context_window_compression: Option<ContextWindowCompressionConfig>,
32 #[serde(skip_serializing_if = "Option::is_none")]
33 pub input_audio_transcription: Option<AudioTranscriptionConfig>,
34 #[serde(skip_serializing_if = "Option::is_none")]
35 pub output_audio_transcription: Option<AudioTranscriptionConfig>,
36 #[serde(skip_serializing_if = "Option::is_none")]
37 pub proactivity: Option<ProactivityConfig>,
38 #[serde(skip_serializing_if = "Option::is_none")]
39 pub explicit_vad_signal: Option<bool>,
40}
41
42pub type LiveSetup = LiveClientSetup;
44
45#[derive(Debug, Clone, Serialize, Deserialize)]
47#[serde(rename_all = "camelCase")]
48pub struct LiveServerSetupComplete {
49 #[serde(skip_serializing_if = "Option::is_none")]
50 pub session_id: Option<String>,
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize)]
55#[serde(rename_all = "camelCase")]
56pub struct Transcription {
57 #[serde(skip_serializing_if = "Option::is_none")]
58 pub text: Option<String>,
59 #[serde(skip_serializing_if = "Option::is_none")]
60 pub finished: Option<bool>,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
65#[serde(rename_all = "camelCase")]
66pub struct LiveServerContent {
67 #[serde(skip_serializing_if = "Option::is_none")]
68 pub model_turn: Option<Content>,
69 #[serde(skip_serializing_if = "Option::is_none")]
70 pub turn_complete: Option<bool>,
71 #[serde(skip_serializing_if = "Option::is_none")]
72 pub interrupted: Option<bool>,
73 #[serde(skip_serializing_if = "Option::is_none")]
74 pub grounding_metadata: Option<GroundingMetadata>,
75 #[serde(skip_serializing_if = "Option::is_none")]
76 pub generation_complete: Option<bool>,
77 #[serde(skip_serializing_if = "Option::is_none")]
78 pub input_transcription: Option<Transcription>,
79 #[serde(skip_serializing_if = "Option::is_none")]
80 pub output_transcription: Option<Transcription>,
81 #[serde(skip_serializing_if = "Option::is_none")]
82 pub url_context_metadata: Option<UrlContextMetadata>,
83 #[serde(skip_serializing_if = "Option::is_none")]
84 pub turn_complete_reason: Option<TurnCompleteReason>,
85 #[serde(skip_serializing_if = "Option::is_none")]
86 pub waiting_for_input: Option<bool>,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
91#[serde(rename_all = "camelCase")]
92pub struct LiveServerToolCall {
93 #[serde(skip_serializing_if = "Option::is_none")]
94 pub function_calls: Option<Vec<FunctionCall>>,
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
99#[serde(rename_all = "camelCase")]
100pub struct LiveServerToolCallCancellation {
101 #[serde(skip_serializing_if = "Option::is_none")]
102 pub ids: Option<Vec<String>>,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize)]
107#[serde(rename_all = "camelCase")]
108pub struct LiveServerGoAway {
109 #[serde(skip_serializing_if = "Option::is_none")]
110 pub time_left: Option<String>,
111}
112
113#[derive(Debug, Clone, Serialize, Deserialize)]
115#[serde(rename_all = "camelCase")]
116pub struct LiveServerSessionResumptionUpdate {
117 #[serde(skip_serializing_if = "Option::is_none")]
118 pub new_handle: Option<String>,
119 #[serde(skip_serializing_if = "Option::is_none")]
120 pub resumable: Option<bool>,
121 #[serde(skip_serializing_if = "Option::is_none")]
122 pub last_consumed_client_message_index: Option<String>,
123}
124
125#[derive(Debug, Clone, Serialize, Deserialize)]
127#[serde(rename_all = "camelCase")]
128pub struct VoiceActivityDetectionSignal {
129 #[serde(skip_serializing_if = "Option::is_none")]
130 pub vad_signal_type: Option<VadSignalType>,
131}
132
133#[derive(Debug, Clone, Serialize, Deserialize)]
135#[serde(rename_all = "camelCase")]
136pub struct LiveServerMessage {
137 #[serde(skip_serializing_if = "Option::is_none")]
138 pub setup_complete: Option<LiveServerSetupComplete>,
139 #[serde(skip_serializing_if = "Option::is_none")]
140 pub server_content: Option<LiveServerContent>,
141 #[serde(skip_serializing_if = "Option::is_none")]
142 pub tool_call: Option<LiveServerToolCall>,
143 #[serde(skip_serializing_if = "Option::is_none")]
144 pub tool_call_cancellation: Option<LiveServerToolCallCancellation>,
145 #[serde(skip_serializing_if = "Option::is_none")]
146 pub usage_metadata: Option<UsageMetadata>,
147 #[serde(skip_serializing_if = "Option::is_none")]
148 pub go_away: Option<LiveServerGoAway>,
149 #[serde(skip_serializing_if = "Option::is_none")]
150 pub session_resumption_update: Option<LiveServerSessionResumptionUpdate>,
151 #[serde(skip_serializing_if = "Option::is_none")]
152 pub voice_activity_detection_signal: Option<VoiceActivityDetectionSignal>,
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
157#[serde(rename_all = "camelCase")]
158pub struct AutomaticActivityDetection {
159 #[serde(skip_serializing_if = "Option::is_none")]
160 pub disabled: Option<bool>,
161 #[serde(skip_serializing_if = "Option::is_none")]
162 pub start_of_speech_sensitivity: Option<StartSensitivity>,
163 #[serde(skip_serializing_if = "Option::is_none")]
164 pub end_of_speech_sensitivity: Option<EndSensitivity>,
165 #[serde(skip_serializing_if = "Option::is_none")]
166 pub prefix_padding_ms: Option<i32>,
167 #[serde(skip_serializing_if = "Option::is_none")]
168 pub silence_duration_ms: Option<i32>,
169}
170
171#[derive(Debug, Clone, Serialize, Deserialize)]
173#[serde(rename_all = "camelCase")]
174pub struct RealtimeInputConfig {
175 #[serde(skip_serializing_if = "Option::is_none")]
176 pub automatic_activity_detection: Option<AutomaticActivityDetection>,
177 #[serde(skip_serializing_if = "Option::is_none")]
178 pub activity_handling: Option<ActivityHandling>,
179 #[serde(skip_serializing_if = "Option::is_none")]
180 pub turn_coverage: Option<TurnCoverage>,
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize)]
185#[serde(rename_all = "camelCase")]
186pub struct SessionResumptionConfig {
187 #[serde(skip_serializing_if = "Option::is_none")]
188 pub handle: Option<String>,
189 #[serde(skip_serializing_if = "Option::is_none")]
190 pub transparent: Option<bool>,
191}
192
193#[derive(Debug, Clone, Serialize, Deserialize)]
195#[serde(rename_all = "camelCase")]
196pub struct ContextWindowCompressionConfig {
197 #[serde(skip_serializing_if = "Option::is_none")]
198 pub trigger_tokens: Option<String>,
199 #[serde(skip_serializing_if = "Option::is_none")]
200 pub sliding_window: Option<SlidingWindow>,
201}
202
203#[derive(Debug, Clone, Serialize, Deserialize)]
205#[serde(rename_all = "camelCase")]
206pub struct SlidingWindow {
207 #[serde(skip_serializing_if = "Option::is_none")]
208 pub target_tokens: Option<String>,
209}
210
211#[derive(Debug, Clone, Serialize, Deserialize, Default)]
213#[serde(rename_all = "camelCase")]
214pub struct AudioTranscriptionConfig {}
215
216#[derive(Debug, Clone, Serialize, Deserialize)]
218#[serde(rename_all = "camelCase")]
219pub struct ProactivityConfig {
220 #[serde(skip_serializing_if = "Option::is_none")]
221 pub proactive_audio: Option<bool>,
222}
223
224#[derive(Debug, Clone, Serialize, Deserialize)]
226#[serde(rename_all = "camelCase")]
227pub struct LiveClientContent {
228 #[serde(skip_serializing_if = "Option::is_none")]
229 pub turns: Option<Vec<Content>>,
230 #[serde(skip_serializing_if = "Option::is_none")]
231 pub turn_complete: Option<bool>,
232}
233
234#[derive(Debug, Clone, Serialize, Deserialize, Default)]
236#[serde(rename_all = "camelCase")]
237pub struct ActivityStart {}
238
239#[derive(Debug, Clone, Serialize, Deserialize, Default)]
241#[serde(rename_all = "camelCase")]
242pub struct ActivityEnd {}
243
244#[derive(Debug, Clone, Serialize, Deserialize)]
246#[serde(rename_all = "camelCase")]
247pub struct LiveClientRealtimeInput {
248 #[serde(skip_serializing_if = "Option::is_none")]
249 pub media_chunks: Option<Vec<Blob>>,
250 #[serde(skip_serializing_if = "Option::is_none")]
251 pub audio: Option<Blob>,
252 #[serde(skip_serializing_if = "Option::is_none")]
253 pub audio_stream_end: Option<bool>,
254 #[serde(skip_serializing_if = "Option::is_none")]
255 pub video: Option<Blob>,
256 #[serde(skip_serializing_if = "Option::is_none")]
257 pub text: Option<String>,
258 #[serde(skip_serializing_if = "Option::is_none")]
259 pub activity_start: Option<ActivityStart>,
260 #[serde(skip_serializing_if = "Option::is_none")]
261 pub activity_end: Option<ActivityEnd>,
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize)]
266#[serde(rename_all = "camelCase")]
267pub struct LiveClientToolResponse {
268 #[serde(skip_serializing_if = "Option::is_none")]
269 pub function_responses: Option<Vec<FunctionResponse>>,
270}
271
272#[derive(Debug, Clone, Serialize, Deserialize)]
274#[serde(rename_all = "camelCase")]
275pub struct LiveSendRealtimeInputParameters {
276 #[serde(skip_serializing_if = "Option::is_none")]
277 pub media: Option<Blob>,
278 #[serde(skip_serializing_if = "Option::is_none")]
279 pub audio: Option<Blob>,
280 #[serde(skip_serializing_if = "Option::is_none")]
281 pub audio_stream_end: Option<bool>,
282 #[serde(skip_serializing_if = "Option::is_none")]
283 pub video: Option<Blob>,
284 #[serde(skip_serializing_if = "Option::is_none")]
285 pub text: Option<String>,
286 #[serde(skip_serializing_if = "Option::is_none")]
287 pub activity_start: Option<ActivityStart>,
288 #[serde(skip_serializing_if = "Option::is_none")]
289 pub activity_end: Option<ActivityEnd>,
290}
291
292#[derive(Debug, Clone, Serialize, Deserialize)]
294#[serde(rename_all = "camelCase")]
295pub struct LiveClientMessage {
296 #[serde(skip_serializing_if = "Option::is_none")]
297 pub setup: Option<LiveClientSetup>,
298 #[serde(skip_serializing_if = "Option::is_none")]
299 pub client_content: Option<LiveClientContent>,
300 #[serde(skip_serializing_if = "Option::is_none")]
301 pub realtime_input: Option<LiveClientRealtimeInput>,
302 #[serde(skip_serializing_if = "Option::is_none")]
303 pub tool_response: Option<LiveClientToolResponse>,
304}
305
306#[derive(Debug, Clone, Serialize, Deserialize, Default)]
308#[serde(rename_all = "camelCase")]
309pub struct LiveConnectConfig {
310 #[serde(skip_serializing_if = "Option::is_none")]
311 pub http_options: Option<HttpOptions>,
312 #[serde(skip_serializing_if = "Option::is_none")]
313 pub generation_config: Option<GenerationConfig>,
314 #[serde(skip_serializing_if = "Option::is_none")]
315 pub response_modalities: Option<Vec<Modality>>,
316 #[serde(skip_serializing_if = "Option::is_none")]
317 pub temperature: Option<f32>,
318 #[serde(skip_serializing_if = "Option::is_none")]
319 pub top_p: Option<f32>,
320 #[serde(skip_serializing_if = "Option::is_none")]
321 pub top_k: Option<i32>,
322 #[serde(skip_serializing_if = "Option::is_none")]
323 pub max_output_tokens: Option<i32>,
324 #[serde(skip_serializing_if = "Option::is_none")]
325 pub media_resolution: Option<MediaResolution>,
326 #[serde(skip_serializing_if = "Option::is_none")]
327 pub seed: Option<i32>,
328 #[serde(skip_serializing_if = "Option::is_none")]
329 pub speech_config: Option<SpeechConfig>,
330 #[serde(skip_serializing_if = "Option::is_none")]
331 pub thinking_config: Option<ThinkingConfig>,
332 #[serde(skip_serializing_if = "Option::is_none")]
333 pub enable_affective_dialog: Option<bool>,
334 #[serde(skip_serializing_if = "Option::is_none")]
335 pub system_instruction: Option<Content>,
336 #[serde(skip_serializing_if = "Option::is_none")]
337 pub tools: Option<Vec<Tool>>,
338 #[serde(skip_serializing_if = "Option::is_none")]
339 pub session_resumption: Option<SessionResumptionConfig>,
340 #[serde(skip_serializing_if = "Option::is_none")]
341 pub input_audio_transcription: Option<AudioTranscriptionConfig>,
342 #[serde(skip_serializing_if = "Option::is_none")]
343 pub output_audio_transcription: Option<AudioTranscriptionConfig>,
344 #[serde(skip_serializing_if = "Option::is_none")]
345 pub realtime_input_config: Option<RealtimeInputConfig>,
346 #[serde(skip_serializing_if = "Option::is_none")]
347 pub context_window_compression: Option<ContextWindowCompressionConfig>,
348 #[serde(skip_serializing_if = "Option::is_none")]
349 pub proactivity: Option<ProactivityConfig>,
350 #[serde(skip_serializing_if = "Option::is_none")]
351 pub explicit_vad_signal: Option<bool>,
352}
353
354#[derive(Debug, Clone, Serialize, Deserialize)]
356#[serde(rename_all = "camelCase")]
357pub struct LiveSendClientContentParameters {
358 #[serde(skip_serializing_if = "Option::is_none")]
359 pub turns: Option<Vec<Content>>,
360 #[serde(skip_serializing_if = "Option::is_none")]
361 pub turn_complete: Option<bool>,
362}
363
364#[derive(Debug, Clone, Serialize, Deserialize)]
366#[serde(rename_all = "camelCase")]
367pub struct LiveSendToolResponseParameters {
368 #[serde(skip_serializing_if = "Option::is_none")]
369 pub function_responses: Option<Vec<FunctionResponse>>,
370}
371
372#[cfg(test)]
373mod tests {
374 use super::*;
375 use crate::content::{Content, Part, Role};
376 use crate::enums::{ActivityHandling, StartSensitivity, VadSignalType};
377
378 #[test]
379 fn live_server_message_roundtrip() {
380 let message = LiveServerMessage {
381 setup_complete: Some(LiveServerSetupComplete {
382 session_id: Some("session-123".to_string()),
383 }),
384 server_content: Some(LiveServerContent {
385 model_turn: Some(Content::from_parts(vec![Part::text("hi")], Role::Model)),
386 turn_complete: Some(true),
387 interrupted: None,
388 grounding_metadata: None,
389 generation_complete: Some(true),
390 input_transcription: Some(Transcription {
391 text: Some("hello".to_string()),
392 finished: Some(true),
393 }),
394 output_transcription: None,
395 url_context_metadata: None,
396 turn_complete_reason: Some(TurnCompleteReason::NeedMoreInput),
397 waiting_for_input: Some(true),
398 }),
399 tool_call: None,
400 tool_call_cancellation: None,
401 usage_metadata: None,
402 go_away: Some(LiveServerGoAway {
403 time_left: Some("5s".to_string()),
404 }),
405 session_resumption_update: Some(LiveServerSessionResumptionUpdate {
406 new_handle: Some("handle-1".to_string()),
407 resumable: Some(true),
408 last_consumed_client_message_index: Some("42".to_string()),
409 }),
410 voice_activity_detection_signal: Some(VoiceActivityDetectionSignal {
411 vad_signal_type: Some(VadSignalType::VadSignalTypeSos),
412 }),
413 };
414
415 let json = serde_json::to_string(&message).unwrap();
416 let decoded: LiveServerMessage = serde_json::from_str(&json).unwrap();
417 let text = decoded
418 .server_content
419 .as_ref()
420 .and_then(|content| content.model_turn.as_ref())
421 .and_then(|content| content.first_text());
422 assert_eq!(text, Some("hi"));
423 }
424
425 #[test]
426 fn realtime_input_config_serializes() {
427 let config = RealtimeInputConfig {
428 automatic_activity_detection: Some(AutomaticActivityDetection {
429 disabled: Some(false),
430 start_of_speech_sensitivity: Some(StartSensitivity::StartSensitivityHigh),
431 end_of_speech_sensitivity: None,
432 prefix_padding_ms: Some(120),
433 silence_duration_ms: Some(300),
434 }),
435 activity_handling: Some(ActivityHandling::StartOfActivityInterrupts),
436 turn_coverage: None,
437 };
438
439 let json = serde_json::to_string(&config).unwrap();
440 assert!(json.contains("automaticActivityDetection"));
441 assert!(json.contains("startOfSpeechSensitivity"));
442 }
443}