Skip to main content

rust_genai_types/
live_types.rs

1use serde::{Deserialize, Serialize};
2
3use crate::config::{GenerationConfig, SpeechConfig, ThinkingConfig};
4use crate::content::{Blob, Content, FunctionCall, FunctionResponse};
5use crate::enums::{
6    ActivityHandling, EndSensitivity, MediaResolution, Modality, StartSensitivity,
7    TurnCompleteReason, TurnCoverage, VadSignalType,
8};
9use crate::grounding::GroundingMetadata;
10use crate::http::HttpOptions;
11use crate::response::{UrlContextMetadata, UsageMetadata};
12use crate::tool::Tool;
13
14/// Live 客户端初始化设置。
15#[derive(Debug, Clone, Serialize, Deserialize)]
16#[serde(rename_all = "camelCase")]
17pub struct LiveClientSetup {
18    #[serde(skip_serializing_if = "Option::is_none")]
19    pub model: Option<String>,
20    #[serde(skip_serializing_if = "Option::is_none")]
21    pub generation_config: Option<GenerationConfig>,
22    #[serde(skip_serializing_if = "Option::is_none")]
23    pub system_instruction: Option<Content>,
24    #[serde(skip_serializing_if = "Option::is_none")]
25    pub tools: Option<Vec<Tool>>,
26    #[serde(skip_serializing_if = "Option::is_none")]
27    pub realtime_input_config: Option<RealtimeInputConfig>,
28    #[serde(skip_serializing_if = "Option::is_none")]
29    pub session_resumption: Option<SessionResumptionConfig>,
30    #[serde(skip_serializing_if = "Option::is_none")]
31    pub context_window_compression: Option<ContextWindowCompressionConfig>,
32    #[serde(skip_serializing_if = "Option::is_none")]
33    pub input_audio_transcription: Option<AudioTranscriptionConfig>,
34    #[serde(skip_serializing_if = "Option::is_none")]
35    pub output_audio_transcription: Option<AudioTranscriptionConfig>,
36    #[serde(skip_serializing_if = "Option::is_none")]
37    pub proactivity: Option<ProactivityConfig>,
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub explicit_vad_signal: Option<bool>,
40}
41
42/// 兼容旧命名的 Live 会话设置。
43pub type LiveSetup = LiveClientSetup;
44
45/// Sent in response to a `LiveGenerateContentSetup` message from the client.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47#[serde(rename_all = "camelCase")]
48pub struct LiveServerSetupComplete {
49    #[serde(skip_serializing_if = "Option::is_none")]
50    pub session_id: Option<String>,
51}
52
53/// Audio transcription in server content.
54#[derive(Debug, Clone, Serialize, Deserialize)]
55#[serde(rename_all = "camelCase")]
56pub struct Transcription {
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub text: Option<String>,
59    #[serde(skip_serializing_if = "Option::is_none")]
60    pub finished: Option<bool>,
61}
62
63/// Incremental server update generated by the model in response to client messages.
64#[derive(Debug, Clone, Serialize, Deserialize)]
65#[serde(rename_all = "camelCase")]
66pub struct LiveServerContent {
67    #[serde(skip_serializing_if = "Option::is_none")]
68    pub model_turn: Option<Content>,
69    #[serde(skip_serializing_if = "Option::is_none")]
70    pub turn_complete: Option<bool>,
71    #[serde(skip_serializing_if = "Option::is_none")]
72    pub interrupted: Option<bool>,
73    #[serde(skip_serializing_if = "Option::is_none")]
74    pub grounding_metadata: Option<GroundingMetadata>,
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub generation_complete: Option<bool>,
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub input_transcription: Option<Transcription>,
79    #[serde(skip_serializing_if = "Option::is_none")]
80    pub output_transcription: Option<Transcription>,
81    #[serde(skip_serializing_if = "Option::is_none")]
82    pub url_context_metadata: Option<UrlContextMetadata>,
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub turn_complete_reason: Option<TurnCompleteReason>,
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub waiting_for_input: Option<bool>,
87}
88
89/// Request for the client to execute function calls.
90#[derive(Debug, Clone, Serialize, Deserialize)]
91#[serde(rename_all = "camelCase")]
92pub struct LiveServerToolCall {
93    #[serde(skip_serializing_if = "Option::is_none")]
94    pub function_calls: Option<Vec<FunctionCall>>,
95}
96
97/// Notification that previously issued tool calls should be cancelled.
98#[derive(Debug, Clone, Serialize, Deserialize)]
99#[serde(rename_all = "camelCase")]
100pub struct LiveServerToolCallCancellation {
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub ids: Option<Vec<String>>,
103}
104
105/// Server will not be able to service client soon.
106#[derive(Debug, Clone, Serialize, Deserialize)]
107#[serde(rename_all = "camelCase")]
108pub struct LiveServerGoAway {
109    #[serde(skip_serializing_if = "Option::is_none")]
110    pub time_left: Option<String>,
111}
112
113/// Update of the session resumption state.
114#[derive(Debug, Clone, Serialize, Deserialize)]
115#[serde(rename_all = "camelCase")]
116pub struct LiveServerSessionResumptionUpdate {
117    #[serde(skip_serializing_if = "Option::is_none")]
118    pub new_handle: Option<String>,
119    #[serde(skip_serializing_if = "Option::is_none")]
120    pub resumable: Option<bool>,
121    #[serde(skip_serializing_if = "Option::is_none")]
122    pub last_consumed_client_message_index: Option<String>,
123}
124
125/// Voice activity detection signal.
126#[derive(Debug, Clone, Serialize, Deserialize)]
127#[serde(rename_all = "camelCase")]
128pub struct VoiceActivityDetectionSignal {
129    #[serde(skip_serializing_if = "Option::is_none")]
130    pub vad_signal_type: Option<VadSignalType>,
131}
132
133/// Response message for API call.
134#[derive(Debug, Clone, Serialize, Deserialize)]
135#[serde(rename_all = "camelCase")]
136pub struct LiveServerMessage {
137    #[serde(skip_serializing_if = "Option::is_none")]
138    pub setup_complete: Option<LiveServerSetupComplete>,
139    #[serde(skip_serializing_if = "Option::is_none")]
140    pub server_content: Option<LiveServerContent>,
141    #[serde(skip_serializing_if = "Option::is_none")]
142    pub tool_call: Option<LiveServerToolCall>,
143    #[serde(skip_serializing_if = "Option::is_none")]
144    pub tool_call_cancellation: Option<LiveServerToolCallCancellation>,
145    #[serde(skip_serializing_if = "Option::is_none")]
146    pub usage_metadata: Option<UsageMetadata>,
147    #[serde(skip_serializing_if = "Option::is_none")]
148    pub go_away: Option<LiveServerGoAway>,
149    #[serde(skip_serializing_if = "Option::is_none")]
150    pub session_resumption_update: Option<LiveServerSessionResumptionUpdate>,
151    #[serde(skip_serializing_if = "Option::is_none")]
152    pub voice_activity_detection_signal: Option<VoiceActivityDetectionSignal>,
153}
154
155/// Configures automatic detection of activity.
156#[derive(Debug, Clone, Serialize, Deserialize)]
157#[serde(rename_all = "camelCase")]
158pub struct AutomaticActivityDetection {
159    #[serde(skip_serializing_if = "Option::is_none")]
160    pub disabled: Option<bool>,
161    #[serde(skip_serializing_if = "Option::is_none")]
162    pub start_of_speech_sensitivity: Option<StartSensitivity>,
163    #[serde(skip_serializing_if = "Option::is_none")]
164    pub end_of_speech_sensitivity: Option<EndSensitivity>,
165    #[serde(skip_serializing_if = "Option::is_none")]
166    pub prefix_padding_ms: Option<i32>,
167    #[serde(skip_serializing_if = "Option::is_none")]
168    pub silence_duration_ms: Option<i32>,
169}
170
171/// Realtime input config.
172#[derive(Debug, Clone, Serialize, Deserialize)]
173#[serde(rename_all = "camelCase")]
174pub struct RealtimeInputConfig {
175    #[serde(skip_serializing_if = "Option::is_none")]
176    pub automatic_activity_detection: Option<AutomaticActivityDetection>,
177    #[serde(skip_serializing_if = "Option::is_none")]
178    pub activity_handling: Option<ActivityHandling>,
179    #[serde(skip_serializing_if = "Option::is_none")]
180    pub turn_coverage: Option<TurnCoverage>,
181}
182
183/// Configuration of session resumption mechanism.
184#[derive(Debug, Clone, Serialize, Deserialize)]
185#[serde(rename_all = "camelCase")]
186pub struct SessionResumptionConfig {
187    #[serde(skip_serializing_if = "Option::is_none")]
188    pub handle: Option<String>,
189    #[serde(skip_serializing_if = "Option::is_none")]
190    pub transparent: Option<bool>,
191}
192
193/// Context window compression config.
194#[derive(Debug, Clone, Serialize, Deserialize)]
195#[serde(rename_all = "camelCase")]
196pub struct ContextWindowCompressionConfig {
197    #[serde(skip_serializing_if = "Option::is_none")]
198    pub trigger_tokens: Option<String>,
199    #[serde(skip_serializing_if = "Option::is_none")]
200    pub sliding_window: Option<SlidingWindow>,
201}
202
203/// Sliding window config.
204#[derive(Debug, Clone, Serialize, Deserialize)]
205#[serde(rename_all = "camelCase")]
206pub struct SlidingWindow {
207    #[serde(skip_serializing_if = "Option::is_none")]
208    pub target_tokens: Option<String>,
209}
210
211/// The audio transcription configuration in setup.
212#[derive(Debug, Clone, Serialize, Deserialize, Default)]
213#[serde(rename_all = "camelCase")]
214pub struct AudioTranscriptionConfig {}
215
216/// Proactivity config.
217#[derive(Debug, Clone, Serialize, Deserialize)]
218#[serde(rename_all = "camelCase")]
219pub struct ProactivityConfig {
220    #[serde(skip_serializing_if = "Option::is_none")]
221    pub proactive_audio: Option<bool>,
222}
223
224/// Incremental update of the current conversation delivered from the client.
225#[derive(Debug, Clone, Serialize, Deserialize)]
226#[serde(rename_all = "camelCase")]
227pub struct LiveClientContent {
228    #[serde(skip_serializing_if = "Option::is_none")]
229    pub turns: Option<Vec<Content>>,
230    #[serde(skip_serializing_if = "Option::is_none")]
231    pub turn_complete: Option<bool>,
232}
233
234/// Marks the start of user activity.
235#[derive(Debug, Clone, Serialize, Deserialize, Default)]
236#[serde(rename_all = "camelCase")]
237pub struct ActivityStart {}
238
239/// Marks the end of user activity.
240#[derive(Debug, Clone, Serialize, Deserialize, Default)]
241#[serde(rename_all = "camelCase")]
242pub struct ActivityEnd {}
243
244/// User input that is sent in real time.
245#[derive(Debug, Clone, Serialize, Deserialize)]
246#[serde(rename_all = "camelCase")]
247pub struct LiveClientRealtimeInput {
248    #[serde(skip_serializing_if = "Option::is_none")]
249    pub media_chunks: Option<Vec<Blob>>,
250    #[serde(skip_serializing_if = "Option::is_none")]
251    pub audio: Option<Blob>,
252    #[serde(skip_serializing_if = "Option::is_none")]
253    pub audio_stream_end: Option<bool>,
254    #[serde(skip_serializing_if = "Option::is_none")]
255    pub video: Option<Blob>,
256    #[serde(skip_serializing_if = "Option::is_none")]
257    pub text: Option<String>,
258    #[serde(skip_serializing_if = "Option::is_none")]
259    pub activity_start: Option<ActivityStart>,
260    #[serde(skip_serializing_if = "Option::is_none")]
261    pub activity_end: Option<ActivityEnd>,
262}
263
264/// Client generated response to a `ToolCall` received from the server.
265#[derive(Debug, Clone, Serialize, Deserialize)]
266#[serde(rename_all = "camelCase")]
267pub struct LiveClientToolResponse {
268    #[serde(skip_serializing_if = "Option::is_none")]
269    pub function_responses: Option<Vec<FunctionResponse>>,
270}
271
272/// Parameters for sending realtime input to the live API.
273#[derive(Debug, Clone, Serialize, Deserialize)]
274#[serde(rename_all = "camelCase")]
275pub struct LiveSendRealtimeInputParameters {
276    #[serde(skip_serializing_if = "Option::is_none")]
277    pub media: Option<Blob>,
278    #[serde(skip_serializing_if = "Option::is_none")]
279    pub audio: Option<Blob>,
280    #[serde(skip_serializing_if = "Option::is_none")]
281    pub audio_stream_end: Option<bool>,
282    #[serde(skip_serializing_if = "Option::is_none")]
283    pub video: Option<Blob>,
284    #[serde(skip_serializing_if = "Option::is_none")]
285    pub text: Option<String>,
286    #[serde(skip_serializing_if = "Option::is_none")]
287    pub activity_start: Option<ActivityStart>,
288    #[serde(skip_serializing_if = "Option::is_none")]
289    pub activity_end: Option<ActivityEnd>,
290}
291
292/// Messages sent by the client in the API call.
293#[derive(Debug, Clone, Serialize, Deserialize)]
294#[serde(rename_all = "camelCase")]
295pub struct LiveClientMessage {
296    #[serde(skip_serializing_if = "Option::is_none")]
297    pub setup: Option<LiveClientSetup>,
298    #[serde(skip_serializing_if = "Option::is_none")]
299    pub client_content: Option<LiveClientContent>,
300    #[serde(skip_serializing_if = "Option::is_none")]
301    pub realtime_input: Option<LiveClientRealtimeInput>,
302    #[serde(skip_serializing_if = "Option::is_none")]
303    pub tool_response: Option<LiveClientToolResponse>,
304}
305
306/// Session config for the API connection.
307#[derive(Debug, Clone, Serialize, Deserialize, Default)]
308#[serde(rename_all = "camelCase")]
309pub struct LiveConnectConfig {
310    #[serde(skip_serializing_if = "Option::is_none")]
311    pub http_options: Option<HttpOptions>,
312    #[serde(skip_serializing_if = "Option::is_none")]
313    pub generation_config: Option<GenerationConfig>,
314    #[serde(skip_serializing_if = "Option::is_none")]
315    pub response_modalities: Option<Vec<Modality>>,
316    #[serde(skip_serializing_if = "Option::is_none")]
317    pub temperature: Option<f32>,
318    #[serde(skip_serializing_if = "Option::is_none")]
319    pub top_p: Option<f32>,
320    #[serde(skip_serializing_if = "Option::is_none")]
321    pub top_k: Option<i32>,
322    #[serde(skip_serializing_if = "Option::is_none")]
323    pub max_output_tokens: Option<i32>,
324    #[serde(skip_serializing_if = "Option::is_none")]
325    pub media_resolution: Option<MediaResolution>,
326    #[serde(skip_serializing_if = "Option::is_none")]
327    pub seed: Option<i32>,
328    #[serde(skip_serializing_if = "Option::is_none")]
329    pub speech_config: Option<SpeechConfig>,
330    #[serde(skip_serializing_if = "Option::is_none")]
331    pub thinking_config: Option<ThinkingConfig>,
332    #[serde(skip_serializing_if = "Option::is_none")]
333    pub enable_affective_dialog: Option<bool>,
334    #[serde(skip_serializing_if = "Option::is_none")]
335    pub system_instruction: Option<Content>,
336    #[serde(skip_serializing_if = "Option::is_none")]
337    pub tools: Option<Vec<Tool>>,
338    #[serde(skip_serializing_if = "Option::is_none")]
339    pub session_resumption: Option<SessionResumptionConfig>,
340    #[serde(skip_serializing_if = "Option::is_none")]
341    pub input_audio_transcription: Option<AudioTranscriptionConfig>,
342    #[serde(skip_serializing_if = "Option::is_none")]
343    pub output_audio_transcription: Option<AudioTranscriptionConfig>,
344    #[serde(skip_serializing_if = "Option::is_none")]
345    pub realtime_input_config: Option<RealtimeInputConfig>,
346    #[serde(skip_serializing_if = "Option::is_none")]
347    pub context_window_compression: Option<ContextWindowCompressionConfig>,
348    #[serde(skip_serializing_if = "Option::is_none")]
349    pub proactivity: Option<ProactivityConfig>,
350    #[serde(skip_serializing_if = "Option::is_none")]
351    pub explicit_vad_signal: Option<bool>,
352}
353
354/// Parameters for sending client content to the live API.
355#[derive(Debug, Clone, Serialize, Deserialize)]
356#[serde(rename_all = "camelCase")]
357pub struct LiveSendClientContentParameters {
358    #[serde(skip_serializing_if = "Option::is_none")]
359    pub turns: Option<Vec<Content>>,
360    #[serde(skip_serializing_if = "Option::is_none")]
361    pub turn_complete: Option<bool>,
362}
363
364/// Parameters for sending tool responses to the live API.
365#[derive(Debug, Clone, Serialize, Deserialize)]
366#[serde(rename_all = "camelCase")]
367pub struct LiveSendToolResponseParameters {
368    #[serde(skip_serializing_if = "Option::is_none")]
369    pub function_responses: Option<Vec<FunctionResponse>>,
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375    use crate::content::{Content, Part, Role};
376    use crate::enums::{ActivityHandling, StartSensitivity, VadSignalType};
377
378    #[test]
379    fn live_server_message_roundtrip() {
380        let message = LiveServerMessage {
381            setup_complete: Some(LiveServerSetupComplete {
382                session_id: Some("session-123".to_string()),
383            }),
384            server_content: Some(LiveServerContent {
385                model_turn: Some(Content::from_parts(vec![Part::text("hi")], Role::Model)),
386                turn_complete: Some(true),
387                interrupted: None,
388                grounding_metadata: None,
389                generation_complete: Some(true),
390                input_transcription: Some(Transcription {
391                    text: Some("hello".to_string()),
392                    finished: Some(true),
393                }),
394                output_transcription: None,
395                url_context_metadata: None,
396                turn_complete_reason: Some(TurnCompleteReason::NeedMoreInput),
397                waiting_for_input: Some(true),
398            }),
399            tool_call: None,
400            tool_call_cancellation: None,
401            usage_metadata: None,
402            go_away: Some(LiveServerGoAway {
403                time_left: Some("5s".to_string()),
404            }),
405            session_resumption_update: Some(LiveServerSessionResumptionUpdate {
406                new_handle: Some("handle-1".to_string()),
407                resumable: Some(true),
408                last_consumed_client_message_index: Some("42".to_string()),
409            }),
410            voice_activity_detection_signal: Some(VoiceActivityDetectionSignal {
411                vad_signal_type: Some(VadSignalType::VadSignalTypeSos),
412            }),
413        };
414
415        let json = serde_json::to_string(&message).unwrap();
416        let decoded: LiveServerMessage = serde_json::from_str(&json).unwrap();
417        let text = decoded
418            .server_content
419            .as_ref()
420            .and_then(|content| content.model_turn.as_ref())
421            .and_then(|content| content.first_text());
422        assert_eq!(text, Some("hi"));
423    }
424
425    #[test]
426    fn realtime_input_config_serializes() {
427        let config = RealtimeInputConfig {
428            automatic_activity_detection: Some(AutomaticActivityDetection {
429                disabled: Some(false),
430                start_of_speech_sensitivity: Some(StartSensitivity::StartSensitivityHigh),
431                end_of_speech_sensitivity: None,
432                prefix_padding_ms: Some(120),
433                silence_duration_ms: Some(300),
434            }),
435            activity_handling: Some(ActivityHandling::StartOfActivityInterrupts),
436            turn_coverage: None,
437        };
438
439        let json = serde_json::to_string(&config).unwrap();
440        assert!(json.contains("automaticActivityDetection"));
441        assert!(json.contains("startOfSpeechSensitivity"));
442    }
443}