Skip to main content

gemini_live/types/
client_message.rs

1//! Client → Server message types.
2//!
3//! The Gemini Live protocol defines **4 client message kinds**, each carrying
4//! exactly one top-level field:
5//!
6//! | Variant          | Wire field        | When to send                     |
7//! |------------------|-------------------|----------------------------------|
8//! | `Setup`          | `setup`           | First message only               |
9//! | `ClientContent`  | `clientContent`   | Conversation history / turns     |
10//! | `RealtimeInput`  | `realtimeInput`   | Streaming audio / video / text   |
11//! | `ToolResponse`   | `toolResponse`    | Replies to server `toolCall`     |
12//!
13//! [`ClientMessage`] is serialised as a serde externally-tagged enum, which
14//! naturally produces `{"setup": {...}}` etc.
15
16use serde::Serialize;
17
18use super::common::{Blob, Content, EmptyObject};
19use super::config::*;
20
21/// A message sent from client to server.
22///
23/// The protocol requires each message to carry **exactly one** top-level field.
24/// Serde's externally-tagged enum representation satisfies this constraint
25/// directly — `ClientMessage::Setup(cfg)` serialises to `{"setup": { ... }}`.
26#[derive(Debug, Clone, Serialize)]
27pub enum ClientMessage {
28    #[serde(rename = "setup")]
29    Setup(SetupConfig),
30    #[serde(rename = "clientContent")]
31    ClientContent(ClientContent),
32    #[serde(rename = "realtimeInput")]
33    RealtimeInput(RealtimeInput),
34    #[serde(rename = "toolResponse")]
35    ToolResponse(ToolResponseMessage),
36}
37
38// ── Setup ────────────────────────────────────────────────────────────────────
39
40/// The first (and only) `setup` message, configuring the session.
41///
42/// `model` is the only required field.  All others have sensible server
43/// defaults when omitted.
44#[derive(Debug, Clone, Default, Serialize)]
45#[serde(rename_all = "camelCase")]
46pub struct SetupConfig {
47    /// Model resource name, e.g. `"models/gemini-3.1-flash-live-preview"`.
48    pub model: String,
49    #[serde(skip_serializing_if = "Option::is_none")]
50    pub generation_config: Option<GenerationConfig>,
51    #[serde(skip_serializing_if = "Option::is_none")]
52    pub system_instruction: Option<Content>,
53    #[serde(skip_serializing_if = "Option::is_none")]
54    pub tools: Option<Vec<Tool>>,
55    #[serde(skip_serializing_if = "Option::is_none")]
56    pub realtime_input_config: Option<RealtimeInputConfig>,
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub session_resumption: Option<SessionResumptionConfig>,
59    #[serde(skip_serializing_if = "Option::is_none")]
60    pub context_window_compression: Option<ContextWindowCompressionConfig>,
61    #[serde(skip_serializing_if = "Option::is_none")]
62    pub input_audio_transcription: Option<AudioTranscriptionConfig>,
63    #[serde(skip_serializing_if = "Option::is_none")]
64    pub output_audio_transcription: Option<AudioTranscriptionConfig>,
65    /// Proactive audio (v1alpha, Gemini 2.5 only).
66    #[serde(skip_serializing_if = "Option::is_none")]
67    pub proactivity: Option<ProactivityConfig>,
68    /// History bootstrapping (Gemini 3.1).
69    #[serde(skip_serializing_if = "Option::is_none")]
70    pub history_config: Option<HistoryConfig>,
71}
72
73// ── ClientContent ────────────────────────────────────────────────────────────
74
75/// Conversation history or incremental content.
76///
77/// On Gemini 2.5 this can be sent at any time during the session.
78/// On Gemini 3.1 it can only be sent as initial history (before the first
79/// `realtimeInput`), and requires `historyConfig.initialHistoryInClientContent = true`.
80#[derive(Debug, Clone, Serialize)]
81#[serde(rename_all = "camelCase")]
82pub struct ClientContent {
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub turns: Option<Vec<Content>>,
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub turn_complete: Option<bool>,
87}
88
89// ── RealtimeInput ────────────────────────────────────────────────────────────
90
91/// Streaming real-time input — audio, video, text, or VAD control signals.
92///
93/// Each message should carry only **one** of these fields.
94///
95/// # Audio format
96/// 16-bit signed little-endian PCM, recommended 16 kHz sample rate.
97/// Chunk size: 100–250 ms (3,200–8,000 bytes raw).
98///
99/// # Video format
100/// JPEG or PNG, max 1 fps, recommended < 200 KB per frame.
101#[derive(Debug, Clone, Serialize)]
102#[serde(rename_all = "camelCase")]
103pub struct RealtimeInput {
104    #[serde(skip_serializing_if = "Option::is_none")]
105    pub audio: Option<Blob>,
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub video: Option<Blob>,
108    #[serde(skip_serializing_if = "Option::is_none")]
109    pub text: Option<String>,
110    /// Manual VAD: signal that user activity has started.
111    /// Requires `automaticActivityDetection.disabled = true`.
112    #[serde(skip_serializing_if = "Option::is_none")]
113    pub activity_start: Option<EmptyObject>,
114    /// Manual VAD: signal that user activity has ended.
115    #[serde(skip_serializing_if = "Option::is_none")]
116    pub activity_end: Option<EmptyObject>,
117    /// Auto VAD: notify server that the mic has been muted / stream ended.
118    #[serde(skip_serializing_if = "Option::is_none")]
119    pub audio_stream_end: Option<bool>,
120}
121
122// ── ToolResponse ─────────────────────────────────────────────────────────────
123
124/// Response to one or more server-initiated function calls.
125#[derive(Debug, Clone, Serialize)]
126#[serde(rename_all = "camelCase")]
127pub struct ToolResponseMessage {
128    pub function_responses: Vec<FunctionResponse>,
129}
130
131/// A single function call result, keyed by the server-assigned `id`.
132#[derive(Debug, Clone, Serialize)]
133#[serde(rename_all = "camelCase")]
134pub struct FunctionResponse {
135    /// Must match the `id` from the corresponding [`FunctionCallRequest`](super::server_message::FunctionCallRequest).
136    pub id: String,
137    pub name: String,
138    /// Arbitrary JSON result returned to the model.
139    pub response: serde_json::Value,
140}