gemini_live/types/client_message.rs
1//! Client → Server message types.
2//!
3//! The Gemini Live protocol defines **4 client message kinds**, each carrying
4//! exactly one top-level field:
5//!
6//! | Variant | Wire field | When to send |
7//! |------------------|-------------------|----------------------------------|
8//! | `Setup` | `setup` | First message only |
9//! | `ClientContent` | `clientContent` | Conversation history / turns |
10//! | `RealtimeInput` | `realtimeInput` | Streaming audio / video / text |
11//! | `ToolResponse` | `toolResponse` | Replies to server `toolCall` |
12//!
13//! [`ClientMessage`] is serialised as a serde externally-tagged enum, which
14//! naturally produces `{"setup": {...}}` etc.
15
16use serde::Serialize;
17
18use super::common::{Blob, Content, EmptyObject};
19use super::config::*;
20
21/// A message sent from client to server.
22///
23/// The protocol requires each message to carry **exactly one** top-level field.
24/// Serde's externally-tagged enum representation satisfies this constraint
25/// directly — `ClientMessage::Setup(cfg)` serialises to `{"setup": { ... }}`.
26#[derive(Debug, Clone, Serialize)]
27pub enum ClientMessage {
28 #[serde(rename = "setup")]
29 Setup(SetupConfig),
30 #[serde(rename = "clientContent")]
31 ClientContent(ClientContent),
32 #[serde(rename = "realtimeInput")]
33 RealtimeInput(RealtimeInput),
34 #[serde(rename = "toolResponse")]
35 ToolResponse(ToolResponseMessage),
36}
37
38// ── Setup ────────────────────────────────────────────────────────────────────
39
40/// The first (and only) `setup` message, configuring the session.
41///
42/// `model` is the only required field. All others have sensible server
43/// defaults when omitted.
44///
45/// This is the canonical home for setup-field semantics in the crate. Keep
46/// model-family caveats and wire-format notes on these fields rather than
47/// restating them in standalone docs.
48#[derive(Debug, Clone, Default, Serialize)]
49#[serde(rename_all = "camelCase")]
50pub struct SetupConfig {
51 /// Model resource name, e.g. `"models/gemini-3.1-flash-live-preview"`.
52 pub model: String,
53 /// Generation-time settings such as response modalities, voice, thinking,
54 /// and sampling controls.
55 #[serde(skip_serializing_if = "Option::is_none")]
56 pub generation_config: Option<GenerationConfig>,
57 /// System prompt or instruction content applied at session setup time.
58 #[serde(skip_serializing_if = "Option::is_none")]
59 pub system_instruction: Option<Content>,
60 /// Tool definitions available to the model for this session.
61 #[serde(skip_serializing_if = "Option::is_none")]
62 pub tools: Option<Vec<Tool>>,
63 /// Real-time audio/video interpretation settings including VAD behaviour.
64 #[serde(skip_serializing_if = "Option::is_none")]
65 pub realtime_input_config: Option<RealtimeInputConfig>,
66 /// Opts the session into server-issued resume handles.
67 ///
68 /// The session layer patches `handle` automatically during reconnects.
69 #[serde(skip_serializing_if = "Option::is_none")]
70 pub session_resumption: Option<SessionResumptionConfig>,
71 /// Server-managed context compression settings.
72 #[serde(skip_serializing_if = "Option::is_none")]
73 pub context_window_compression: Option<ContextWindowCompressionConfig>,
74 /// Presence-activated input speech transcription (`{}` to enable).
75 #[serde(skip_serializing_if = "Option::is_none")]
76 pub input_audio_transcription: Option<AudioTranscriptionConfig>,
77 /// Presence-activated output speech transcription (`{}` to enable).
78 #[serde(skip_serializing_if = "Option::is_none")]
79 pub output_audio_transcription: Option<AudioTranscriptionConfig>,
80 /// Proactive audio (v1alpha, Gemini 2.5 only).
81 #[serde(skip_serializing_if = "Option::is_none")]
82 pub proactivity: Option<ProactivityConfig>,
83 /// History bootstrapping (Gemini 3.1). This only affects how initial
84 /// `clientContent` may be sent before the first `realtimeInput`.
85 #[serde(skip_serializing_if = "Option::is_none")]
86 pub history_config: Option<HistoryConfig>,
87}
88
89// ── ClientContent ────────────────────────────────────────────────────────────
90
91/// Conversation history or incremental content.
92///
93/// On Gemini 2.5 this can be sent at any time during the session.
94/// On Gemini 3.1 it can only be sent as initial history (before the first
95/// `realtimeInput`), and requires `historyConfig.initialHistoryInClientContent = true`.
96#[derive(Debug, Clone, Serialize)]
97#[serde(rename_all = "camelCase")]
98pub struct ClientContent {
99 #[serde(skip_serializing_if = "Option::is_none")]
100 pub turns: Option<Vec<Content>>,
101 #[serde(skip_serializing_if = "Option::is_none")]
102 pub turn_complete: Option<bool>,
103}
104
105// ── RealtimeInput ────────────────────────────────────────────────────────────
106
107/// Streaming real-time input — audio, video, text, or VAD control signals.
108///
109/// Each message should carry only **one** of these fields.
110///
111/// # Audio format
112/// 16-bit signed little-endian PCM, recommended 16 kHz sample rate.
113/// Chunk size: 100–250 ms (3,200–8,000 bytes raw).
114///
115/// # Video format
116/// JPEG or PNG, max 1 fps, recommended < 200 KB per frame.
117#[derive(Debug, Clone, Default, Serialize)]
118#[serde(rename_all = "camelCase")]
119pub struct RealtimeInput {
120 #[serde(skip_serializing_if = "Option::is_none")]
121 pub audio: Option<Blob>,
122 #[serde(skip_serializing_if = "Option::is_none")]
123 pub video: Option<Blob>,
124 #[serde(skip_serializing_if = "Option::is_none")]
125 pub text: Option<String>,
126 /// Manual VAD: signal that user activity has started.
127 /// Requires `automaticActivityDetection.disabled = true`.
128 #[serde(skip_serializing_if = "Option::is_none")]
129 pub activity_start: Option<EmptyObject>,
130 /// Manual VAD: signal that user activity has ended.
131 #[serde(skip_serializing_if = "Option::is_none")]
132 pub activity_end: Option<EmptyObject>,
133 /// Auto VAD: notify server that the mic has been muted / stream ended.
134 #[serde(skip_serializing_if = "Option::is_none")]
135 pub audio_stream_end: Option<bool>,
136}
137
138// ── ToolResponse ─────────────────────────────────────────────────────────────
139
140/// Response to one or more server-initiated function calls.
141#[derive(Debug, Clone, Serialize)]
142#[serde(rename_all = "camelCase")]
143pub struct ToolResponseMessage {
144 pub function_responses: Vec<FunctionResponse>,
145}
146
147/// A single function call result, keyed by the server-assigned `id`.
148#[derive(Debug, Clone, Serialize)]
149#[serde(rename_all = "camelCase")]
150pub struct FunctionResponse {
151 /// Must match the `id` from the corresponding [`FunctionCallRequest`](super::server_message::FunctionCallRequest).
152 pub id: String,
153 pub name: String,
154 /// Arbitrary JSON result returned to the model.
155 ///
156 /// Keep the payload flexible: current Live API docs place some Gemini 2.5
157 /// tool-response scheduling knobs inside this JSON object rather than as a
158 /// top-level Rust field.
159 pub response: serde_json::Value,
160}