Skip to main content

agent_sdk_providers/impls/
gemini.rs

1//! Google Gemini API provider implementation.
2//!
3//! This module provides an implementation of `LlmProvider` for the Google Gemini
4//! API (`generativelanguage.googleapis.com`).
5
6pub(crate) mod data;
7
8use crate::attachments::validate_request_attachments;
9use crate::provider::LlmProvider;
10use crate::streaming::{StreamBox, StreamDelta, StreamErrorKind};
11use agent_sdk_foundation::llm::{ChatOutcome, ChatRequest, ChatResponse, ThinkingConfig};
12use anyhow::Result;
13use async_trait::async_trait;
14use data::{
15    ApiContent, ApiFunctionCallingConfig, ApiGenerateContentRequest, ApiGenerateContentResponse,
16    ApiGenerationConfig, ApiPart, ApiUsageMetadata, build_api_contents, build_content_blocks,
17    convert_tools_to_config, gemini_response_schema, map_finish_reason, map_thinking_config,
18};
19use reqwest::StatusCode;
20
21const API_BASE_URL: &str = "https://generativelanguage.googleapis.com/v1beta";
22
23/// Connect timeout for the HTTP client (matches Anthropic/Vertex).
24const CONNECT_TIMEOUT_SECS: u64 = 30;
25/// TCP keepalive interval to keep long streaming connections from dropping.
26const TCP_KEEPALIVE_SECS: u64 = 30;
27/// Per-request read timeout for the **non-streaming** `chat()` path. Bounds a
28/// black-holed endpoint so a single turn cannot hang the agent loop forever.
29/// Streaming requests intentionally have no overall timeout.
30const CHAT_READ_TIMEOUT_SECS: u64 = 300;
31
32/// Max page size the Gemini `ListModels` endpoint accepts (default is 50).
33const MODELS_PAGE_SIZE: u32 = 1000;
34/// Upper bound on pages followed by `list_models`, guarding against a server
35/// that never clears `nextPageToken`.
36const MODELS_MAX_PAGES: usize = 100;
37
38/// Build the shared HTTP client with connect + keepalive timeouts, falling back
39/// to a default client (with a logged warning) if the builder fails.
40fn build_http_client() -> reqwest::Client {
41    reqwest::Client::builder()
42        .connect_timeout(std::time::Duration::from_secs(CONNECT_TIMEOUT_SECS))
43        .tcp_keepalive(std::time::Duration::from_secs(TCP_KEEPALIVE_SECS))
44        .build()
45        .unwrap_or_else(|error| {
46            log::warn!(
47                "failed to build Gemini HTTP client with timeouts ({error}); using default client"
48            );
49            reqwest::Client::new()
50        })
51}
52
53// Gemini 3.1 series
54pub const MODEL_GEMINI_31_PRO: &str = "gemini-3.1-pro-preview";
55pub const MODEL_GEMINI_31_FLASH_LITE: &str = "gemini-3.1-flash-lite-preview";
56
57// Gemini 3 series
58pub const MODEL_GEMINI_3_FLASH: &str = "gemini-3-flash-preview";
59
60// Legacy Gemini 3.0 Pro model kept for explicit opt-in.
61pub const MODEL_GEMINI_3_PRO: &str = "gemini-3.0-pro";
62
63// Gemini 2.5 series
64pub const MODEL_GEMINI_25_FLASH: &str = "gemini-2.5-flash";
65pub const MODEL_GEMINI_25_PRO: &str = "gemini-2.5-pro";
66
67// Gemini 2.0 series
68pub const MODEL_GEMINI_2_FLASH: &str = "gemini-2.0-flash";
69pub const MODEL_GEMINI_2_FLASH_LITE: &str = "gemini-2.0-flash-lite";
70
71/// Google Gemini LLM provider.
72#[derive(Clone)]
73pub struct GeminiProvider {
74    client: reqwest::Client,
75    api_key: String,
76    model: String,
77    base_url: String,
78    thinking: Option<ThinkingConfig>,
79    /// When true, send the API key via `x-goog-api-key` header instead of a
80    /// query parameter. Required when routing through proxies.
81    use_header_auth: bool,
82    /// Extra headers applied to every request (e.g. for gateway authentication).
83    extra_headers: Vec<(String, String)>,
84}
85
86impl GeminiProvider {
87    /// The conventional environment variable holding the Gemini API key.
88    pub const API_KEY_ENV: &'static str = "GEMINI_API_KEY";
89
90    /// Create a new Gemini provider with the specified API key and model.
91    #[must_use]
92    pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
93        Self {
94            client: build_http_client(),
95            api_key: api_key.into(),
96            model: model.into(),
97            base_url: API_BASE_URL.to_owned(),
98            thinking: None,
99            use_header_auth: true,
100            extra_headers: Vec::new(),
101        }
102    }
103
104    /// Effective output-token budget for a request.
105    ///
106    /// Mirrors the Anthropic provider: when the caller did not explicitly set
107    /// `max_tokens`, substitute the provider/model default
108    /// ([`default_max_tokens`](LlmProvider::default_max_tokens)) instead of
109    /// silently capping at `ChatRequest::DEFAULT_MAX_TOKENS`.
110    fn effective_max_tokens(&self, request: &ChatRequest) -> u32 {
111        if request.max_tokens_explicit {
112            request.max_tokens
113        } else {
114            self.default_max_tokens()
115        }
116    }
117
118    /// Create a provider using Gemini Flash, reading the API key from the
119    /// conventional [`GEMINI_API_KEY`](Self::API_KEY_ENV) environment variable.
120    ///
121    /// # Panics
122    ///
123    /// Panics if `GEMINI_API_KEY` is not set. Prefer
124    /// [`try_from_env`](Self::try_from_env) outside of examples/tests.
125    #[must_use]
126    pub fn from_env() -> Self {
127        Self::try_from_env().unwrap_or_else(|e| panic!("{e}"))
128    }
129
130    /// Create a provider using Gemini Flash, reading the API key from the
131    /// conventional [`GEMINI_API_KEY`](Self::API_KEY_ENV) environment variable.
132    ///
133    /// # Errors
134    ///
135    /// Returns an error if `GEMINI_API_KEY` is unset or not valid UTF-8.
136    pub fn try_from_env() -> Result<Self> {
137        let api_key = std::env::var(Self::API_KEY_ENV).map_err(|_| {
138            anyhow::anyhow!("environment variable `{}` is not set", Self::API_KEY_ENV)
139        })?;
140        Ok(Self::flash(api_key))
141    }
142
143    /// Create a provider using Gemini 3 Flash Preview (fast and capable, current default).
144    #[must_use]
145    pub fn flash(api_key: impl Into<String>) -> Self {
146        Self::new(api_key, MODEL_GEMINI_3_FLASH)
147    }
148
149    /// Create a provider using Gemini 3.1 Flash Lite Preview.
150    #[must_use]
151    pub fn flash_lite_31(api_key: String) -> Self {
152        Self::new(api_key, MODEL_GEMINI_31_FLASH_LITE.to_owned())
153    }
154
155    /// Create a provider using Gemini 2.0 Flash Lite (fastest, most cost-effective).
156    #[must_use]
157    pub fn flash_lite(api_key: String) -> Self {
158        Self::new(api_key, MODEL_GEMINI_2_FLASH_LITE.to_owned())
159    }
160
161    /// Create a provider using Gemini 3.1 Pro Preview.
162    #[must_use]
163    pub fn pro_31(api_key: String) -> Self {
164        Self::new(api_key, MODEL_GEMINI_31_PRO.to_owned())
165    }
166
167    /// Create a provider using Gemini 3.1 Pro Preview (current recommended pro model).
168    #[must_use]
169    pub fn pro(api_key: String) -> Self {
170        Self::new(api_key, MODEL_GEMINI_31_PRO.to_owned())
171    }
172
173    /// Set the provider-owned thinking configuration for this model.
174    #[must_use]
175    pub const fn with_thinking(mut self, thinking: ThinkingConfig) -> Self {
176        self.thinking = Some(thinking);
177        self
178    }
179
180    /// Override the base URL.
181    #[must_use]
182    pub fn with_base_url(mut self, base_url: impl Into<String>) -> Self {
183        self.base_url = base_url.into();
184        self
185    }
186
187    /// Send the API key via `x-goog-api-key` header instead of `?key=` query
188    /// parameter. Required when routing through proxies.
189    #[must_use]
190    pub const fn with_header_auth(mut self) -> Self {
191        self.use_header_auth = true;
192        self
193    }
194
195    /// Add extra HTTP headers applied to every request.
196    #[must_use]
197    pub fn with_extra_headers(mut self, headers: Vec<(String, String)>) -> Self {
198        self.extra_headers = headers;
199        self
200    }
201
202    /// Apply auth + extra headers. Skips provider auth when `api_key` is
203    /// empty (BYOK gateway mode).
204    fn apply_auth(&self, builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
205        let builder = if self.api_key.is_empty() {
206            builder
207        } else if self.use_header_auth {
208            builder.header("x-goog-api-key", &self.api_key)
209        } else {
210            builder.query(&[("key", &self.api_key)])
211        };
212        self.extra_headers
213            .iter()
214            .fold(builder, |b, (k, v)| b.header(k.as_str(), v.as_str()))
215    }
216}
217
218#[async_trait]
219#[allow(clippy::too_many_lines)]
220impl LlmProvider for GeminiProvider {
221    async fn chat(&self, request: ChatRequest) -> Result<ChatOutcome> {
222        let thinking = match self.resolve_thinking_config(request.thinking.as_ref()) {
223            Ok(thinking) => thinking,
224            Err(error) => return Ok(ChatOutcome::InvalidRequest(error.to_string())),
225        };
226        if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
227            return Ok(ChatOutcome::InvalidRequest(error.to_string()));
228        }
229        let contents = build_api_contents(&request.messages);
230        let tools = request
231            .tools
232            .as_ref()
233            .map(|t| convert_tools_to_config(t.clone()));
234        let tool_config = request
235            .tool_choice
236            .as_ref()
237            .map(ApiFunctionCallingConfig::from_tool_choice);
238        let system_instruction = if request.system.is_empty() {
239            None
240        } else {
241            Some(ApiContent {
242                role: None,
243                parts: vec![ApiPart::Text {
244                    text: request.system.clone(),
245                    thought_signature: None,
246                }],
247            })
248        };
249
250        let thinking_config = thinking.as_ref().map(map_thinking_config);
251        let (response_mime_type, response_schema) =
252            request.response_format.as_ref().map_or((None, None), |rf| {
253                (
254                    Some("application/json"),
255                    Some(gemini_response_schema(&rf.schema)),
256                )
257            });
258
259        let max_tokens = self.effective_max_tokens(&request);
260        let api_request = ApiGenerateContentRequest {
261            contents: &contents,
262            system_instruction: system_instruction.as_ref(),
263            tools: tools.as_ref().map(std::slice::from_ref),
264            tool_config,
265            generation_config: Some(ApiGenerationConfig {
266                max_output_tokens: Some(max_tokens),
267                thinking_config,
268                response_mime_type,
269                response_schema,
270            }),
271            cached_content: request.cached_content.as_deref(),
272        };
273
274        log::debug!(
275            "Gemini LLM request model={} max_tokens={}",
276            self.model,
277            max_tokens
278        );
279
280        let builder = self
281            .client
282            .post(format!(
283                "{}/models/{}:generateContent",
284                self.base_url, self.model
285            ))
286            .header("Content-Type", "application/json")
287            .timeout(std::time::Duration::from_secs(CHAT_READ_TIMEOUT_SECS));
288        let response = self
289            .apply_auth(builder)
290            .json(&api_request)
291            .send()
292            .await
293            .map_err(|e| anyhow::anyhow!("request failed: {e}"))?;
294
295        let status = response.status();
296        // Read `Retry-After` off the 429 response before the body is consumed.
297        let retry_after = if status == StatusCode::TOO_MANY_REQUESTS {
298            crate::http::retry_after_from_headers(response.headers())
299        } else {
300            None
301        };
302        let bytes = response
303            .bytes()
304            .await
305            .map_err(|e| anyhow::anyhow!("failed to read response body: {e}"))?;
306
307        log::debug!(
308            "Gemini LLM response status={} body_len={}",
309            status,
310            bytes.len()
311        );
312
313        if status == StatusCode::TOO_MANY_REQUESTS {
314            return Ok(ChatOutcome::RateLimited(retry_after));
315        }
316
317        if status.is_server_error() {
318            let body = String::from_utf8_lossy(&bytes);
319            log::error!("Gemini server error status={status} body={body}");
320            return Ok(ChatOutcome::ServerError(body.into_owned()));
321        }
322
323        if status.is_client_error() {
324            let body = String::from_utf8_lossy(&bytes);
325            log::warn!("Gemini client error status={status} body={body}");
326            return Ok(ChatOutcome::InvalidRequest(body.into_owned()));
327        }
328
329        let api_response: ApiGenerateContentResponse = serde_json::from_slice(&bytes)
330            .map_err(|e| anyhow::anyhow!("failed to parse response: {e}"))?;
331
332        let candidate = api_response
333            .candidates
334            .into_iter()
335            .next()
336            .ok_or_else(|| anyhow::anyhow!("no candidates in response"))?;
337
338        let content = build_content_blocks(&candidate.content);
339
340        if content.is_empty() && !candidate.content.parts.is_empty() {
341            log::warn!(
342                "Gemini parts not converted to content blocks raw_parts={:?}",
343                candidate.content.parts
344            );
345        }
346
347        let has_tool_calls = content
348            .iter()
349            .any(|b| matches!(b, agent_sdk_foundation::llm::ContentBlock::ToolUse { .. }));
350
351        let stop_reason = candidate
352            .finish_reason
353            .as_ref()
354            .map(|r| map_finish_reason(r, has_tool_calls));
355
356        let usage = api_response
357            .usage_metadata
358            .unwrap_or(ApiUsageMetadata {
359                prompt: 0,
360                candidates: 0,
361                cached_content: 0,
362            })
363            .into_usage();
364
365        Ok(ChatOutcome::Success(ChatResponse {
366            id: String::new(),
367            content,
368            model: self.model.clone(),
369            stop_reason,
370            usage,
371        }))
372    }
373
374    fn chat_stream(&self, request: ChatRequest) -> StreamBox<'_> {
375        Box::pin(async_stream::stream! {
376            let thinking = match self.resolve_thinking_config(request.thinking.as_ref()) {
377                Ok(thinking) => thinking,
378                Err(error) => {
379                    yield Ok(StreamDelta::Error {
380                        message: error.to_string(),
381                        kind: StreamErrorKind::InvalidRequest,
382                    });
383                    return;
384                }
385            };
386            if let Err(error) = validate_request_attachments(self.provider(), self.model(), &request) {
387                yield Ok(StreamDelta::Error {
388                    message: error.to_string(),
389                    kind: StreamErrorKind::InvalidRequest,
390                });
391                return;
392            }
393            let contents = build_api_contents(&request.messages);
394            let tools = request
395            .tools
396            .as_ref()
397            .map(|t| convert_tools_to_config(t.clone()));
398            let tool_config = request
399                .tool_choice
400                .as_ref()
401                .map(ApiFunctionCallingConfig::from_tool_choice);
402            let system_instruction = if request.system.is_empty() {
403                None
404            } else {
405                Some(ApiContent {
406                    role: None,
407                    parts: vec![ApiPart::Text {
408                        text: request.system.clone(),
409                        thought_signature: None,
410                    }],
411                })
412            };
413
414            let thinking_config = thinking.as_ref().map(map_thinking_config);
415            let (response_mime_type, response_schema) = request
416                .response_format
417                .as_ref()
418                .map_or((None, None), |rf| {
419                    (
420                        Some("application/json"),
421                        Some(gemini_response_schema(&rf.schema)),
422                    )
423                });
424
425            let max_tokens = self.effective_max_tokens(&request);
426            let api_request = ApiGenerateContentRequest {
427                contents: &contents,
428                system_instruction: system_instruction.as_ref(),
429                tools: tools.as_ref().map(std::slice::from_ref),
430                tool_config,
431                generation_config: Some(ApiGenerationConfig {
432                    max_output_tokens: Some(max_tokens),
433                    thinking_config,
434                    response_mime_type,
435                    response_schema,
436                }),
437                cached_content: request.cached_content.as_deref(),
438            };
439
440            log::debug!(
441                "Gemini streaming LLM request model={} max_tokens={}",
442                self.model,
443                max_tokens
444            );
445
446            let stream_builder = self
447                .client
448                .post(format!(
449                    "{}/models/{}:streamGenerateContent",
450                    self.base_url, self.model
451                ))
452                .header("Content-Type", "application/json")
453                .query(&[("alt", "sse")]);
454            let response = match self
455                .apply_auth(stream_builder)
456                .json(&api_request)
457                .send()
458                .await
459            {
460                Ok(r) => r,
461                Err(e) => {
462                    // Include the cause so 401 detection / diagnostics survive.
463                    yield Err(anyhow::anyhow!("request failed: {e}"));
464                    return;
465                }
466            };
467
468            let status = response.status();
469            if !status.is_success() {
470                let body = response.text().await.unwrap_or_default();
471                let kind = if status == StatusCode::TOO_MANY_REQUESTS {
472                    StreamErrorKind::RateLimited
473                } else if status.is_server_error() {
474                    StreamErrorKind::ServerError
475                } else {
476                    StreamErrorKind::InvalidRequest
477                };
478                log::warn!("Gemini error status={status} body={body}");
479                yield Ok(StreamDelta::Error {
480                    message: body,
481                    kind,
482                });
483                return;
484            }
485
486            let mut inner = data::stream_gemini_response(response);
487            while let Some(item) = futures::StreamExt::next(&mut inner).await {
488                yield item;
489            }
490        })
491    }
492
493    async fn list_models(&self) -> Result<Vec<crate::provider::ModelInfo>> {
494        // The endpoint paginates (default `pageSize=50`). Request the max page
495        // size and follow `nextPageToken` until exhausted, collecting *raw*
496        // rows. The `generateContent` filter is applied only after every page is
497        // in hand, so server-side truncation cannot hide a chat-capable model.
498        let mut rows: Vec<GeminiModelRow> = Vec::new();
499        let mut page_token: Option<String> = None;
500        for _ in 0..MODELS_MAX_PAGES {
501            let mut query: Vec<(&str, String)> = vec![("pageSize", MODELS_PAGE_SIZE.to_string())];
502            if let Some(token) = &page_token {
503                query.push(("pageToken", token.clone()));
504            }
505            let builder = self
506                .client
507                .get(format!("{}/models", self.base_url))
508                .header("Content-Type", "application/json")
509                .query(&query);
510            let builder = self.apply_auth(builder);
511            let body =
512                crate::impls::model_listing::fetch_model_list_body(builder, "Gemini").await?;
513            let page = parse_models_page(&body)?;
514            rows.extend(page.models);
515            match page.next_page_token {
516                Some(token) if !token.is_empty() => page_token = Some(token),
517                _ => break,
518            }
519        }
520        Ok(finalize_gemini_models(rows))
521    }
522
523    fn model(&self) -> &str {
524        &self.model
525    }
526
527    fn provider(&self) -> &'static str {
528        "gemini"
529    }
530
531    fn configured_thinking(&self) -> Option<&ThinkingConfig> {
532        self.thinking.as_ref()
533    }
534}
535
536/// A raw Gemini model row, kept un-filtered so the `generateContent` filter can
537/// be applied only *after* every page has been collected (so server-side page
538/// truncation cannot hide a chat-capable model behind a page boundary).
539#[derive(serde::Deserialize)]
540struct GeminiModelRow {
541    name: String,
542    #[serde(rename = "displayName", default)]
543    display_name: Option<String>,
544    #[serde(rename = "inputTokenLimit", default)]
545    input_token_limit: Option<u32>,
546    #[serde(rename = "outputTokenLimit", default)]
547    output_token_limit: Option<u32>,
548    #[serde(rename = "supportedGenerationMethods", default)]
549    supported_generation_methods: Vec<String>,
550}
551
552/// One page of the Gemini `ListModels` response: raw rows plus the cursor used
553/// to follow pagination.
554struct GeminiModelsPage {
555    models: Vec<GeminiModelRow>,
556    next_page_token: Option<String>,
557}
558
559/// Parse one page of the Gemini `GET /v1beta/models` response body.
560///
561/// The endpoint returns `{ "models": [{ "name": "models/<id>", "displayName",
562/// "inputTokenLimit", "outputTokenLimit", "supportedGenerationMethods" }],
563/// "nextPageToken": "..." }`. It paginates with a default `pageSize` of 50;
564/// `nextPageToken` drives the next request. Raw rows are returned un-filtered so
565/// the caller can apply the `generateContent` filter once all pages are in hand.
566fn parse_models_page(body: &str) -> Result<GeminiModelsPage> {
567    #[derive(serde::Deserialize)]
568    struct ListResponse {
569        #[serde(default)]
570        models: Vec<GeminiModelRow>,
571        #[serde(rename = "nextPageToken", default)]
572        next_page_token: Option<String>,
573    }
574    let parsed: ListResponse = serde_json::from_str(body)
575        .map_err(|e| anyhow::anyhow!("failed to parse Gemini models list: {e}"))?;
576    Ok(GeminiModelsPage {
577        models: parsed.models,
578        next_page_token: parsed.next_page_token,
579    })
580}
581
582/// Filter accumulated rows to chat-capable models and project them into
583/// [`ModelInfo`].
584///
585/// Entries that do not support `generateContent` (e.g. embedding-only models)
586/// are dropped, and the `models/` prefix is stripped from `name` to recover the
587/// bare model id the chat endpoint expects. Applied *after* all pages are
588/// collected so a chat-capable model never gets hidden by page truncation.
589fn finalize_gemini_models(rows: Vec<GeminiModelRow>) -> Vec<crate::provider::ModelInfo> {
590    rows.into_iter()
591        .filter(|row| {
592            row.supported_generation_methods.is_empty()
593                || row
594                    .supported_generation_methods
595                    .iter()
596                    .any(|m| m == "generateContent")
597        })
598        .map(|row| crate::provider::ModelInfo {
599            id: match row.name.strip_prefix("models/") {
600                Some(stripped) => stripped.to_owned(),
601                None => row.name.clone(),
602            },
603            display_name: row.display_name,
604            context_window: row.input_token_limit,
605            max_output_tokens: row.output_token_limit,
606        })
607        .collect()
608}
609
610#[cfg(test)]
611mod tests {
612    use super::*;
613
614    const GEMINI_MODELS_FIXTURE: &str = r#"{
615      "models": [
616        {
617          "name": "models/gemini-2.5-pro",
618          "displayName": "Gemini 2.5 Pro",
619          "inputTokenLimit": 1048576,
620          "outputTokenLimit": 65536,
621          "supportedGenerationMethods": ["generateContent", "countTokens"]
622        },
623        {
624          "name": "models/text-embedding-004",
625          "displayName": "Text Embedding 004",
626          "inputTokenLimit": 2048,
627          "outputTokenLimit": 1,
628          "supportedGenerationMethods": ["embedContent"]
629        }
630      ]
631    }"#;
632
633    #[test]
634    fn parse_models_page_strips_prefix_and_maps_limits() -> anyhow::Result<()> {
635        let page = parse_models_page(GEMINI_MODELS_FIXTURE)?;
636        let models = finalize_gemini_models(page.models);
637        // The embedding-only model is filtered out (no `generateContent`).
638        assert_eq!(models.len(), 1);
639        let pro = &models[0];
640        assert_eq!(pro.id, "gemini-2.5-pro");
641        assert_eq!(pro.display_name.as_deref(), Some("Gemini 2.5 Pro"));
642        assert_eq!(pro.context_window, Some(1_048_576));
643        assert_eq!(pro.max_output_tokens, Some(65_536));
644        assert_eq!(page.next_page_token, None);
645        Ok(())
646    }
647
648    #[tokio::test]
649    async fn list_models_follows_pagination_and_filters_after_all_pages() -> anyhow::Result<()> {
650        use wiremock::matchers::{method, path, query_param, query_param_is_missing};
651        use wiremock::{Mock, MockServer, ResponseTemplate};
652
653        let server = MockServer::start().await;
654
655        // Page 1: a chat model plus an embedding-only model, then a page token.
656        // The embedding model must NOT be filtered out mid-pagination — the
657        // filter runs only after every page is collected.
658        Mock::given(method("GET"))
659            .and(path("/models"))
660            .and(query_param_is_missing("pageToken"))
661            .respond_with(ResponseTemplate::new(200).set_body_string(
662                r#"{
663                  "models": [
664                    {
665                      "name": "models/gemini-2.5-pro",
666                      "displayName": "Gemini 2.5 Pro",
667                      "inputTokenLimit": 1048576,
668                      "outputTokenLimit": 65536,
669                      "supportedGenerationMethods": ["generateContent"]
670                    },
671                    {
672                      "name": "models/text-embedding-004",
673                      "displayName": "Embedding",
674                      "supportedGenerationMethods": ["embedContent"]
675                    }
676                  ],
677                  "nextPageToken": "page-2"
678                }"#,
679            ))
680            .mount(&server)
681            .await;
682
683        // Page 2: requested with `pageToken=page-2`; final page (no token).
684        Mock::given(method("GET"))
685            .and(path("/models"))
686            .and(query_param("pageToken", "page-2"))
687            .respond_with(ResponseTemplate::new(200).set_body_string(
688                r#"{
689                  "models": [
690                    {
691                      "name": "models/gemini-3-flash",
692                      "displayName": "Gemini 3 Flash",
693                      "inputTokenLimit": 1048576,
694                      "outputTokenLimit": 65536,
695                      "supportedGenerationMethods": ["generateContent"]
696                    }
697                  ]
698                }"#,
699            ))
700            .mount(&server)
701            .await;
702
703        let provider = GeminiProvider::new("test-key".to_string(), "gemini-test".to_string())
704            .with_base_url(server.uri());
705        let models = provider.list_models().await?;
706
707        // Both chat models from both pages are returned; the embedding-only
708        // model is dropped by the post-pagination filter.
709        let ids: Vec<&str> = models.iter().map(|m| m.id.as_str()).collect();
710        assert_eq!(ids, vec!["gemini-2.5-pro", "gemini-3-flash"]);
711        Ok(())
712    }
713
714    #[test]
715    fn test_new_creates_provider_with_custom_model() {
716        let provider = GeminiProvider::new("test-api-key".to_string(), "custom-model".to_string());
717
718        assert_eq!(provider.model(), "custom-model");
719        assert_eq!(provider.provider(), "gemini");
720    }
721
722    #[test]
723    fn test_flash_factory_creates_flash_provider() {
724        let provider = GeminiProvider::flash("test-api-key".to_string());
725
726        assert_eq!(provider.model(), MODEL_GEMINI_3_FLASH);
727        assert_eq!(provider.provider(), "gemini");
728    }
729
730    #[test]
731    fn test_flash_lite_factory_creates_flash_lite_provider() {
732        let provider = GeminiProvider::flash_lite("test-api-key".to_string());
733
734        assert_eq!(provider.model(), MODEL_GEMINI_2_FLASH_LITE);
735        assert_eq!(provider.provider(), "gemini");
736    }
737
738    #[test]
739    fn test_flash_lite_31_factory_creates_flash_lite_provider() {
740        let provider = GeminiProvider::flash_lite_31("test-api-key".to_string());
741
742        assert_eq!(provider.model(), MODEL_GEMINI_31_FLASH_LITE);
743        assert_eq!(provider.provider(), "gemini");
744    }
745
746    #[test]
747    fn test_pro_factory_creates_pro_provider() {
748        let provider = GeminiProvider::pro("test-api-key".to_string());
749
750        assert_eq!(provider.model(), MODEL_GEMINI_31_PRO);
751        assert_eq!(provider.provider(), "gemini");
752    }
753
754    #[test]
755    fn test_pro_31_factory_creates_pro_provider() {
756        let provider = GeminiProvider::pro_31("test-api-key".to_string());
757
758        assert_eq!(provider.model(), MODEL_GEMINI_31_PRO);
759        assert_eq!(provider.provider(), "gemini");
760    }
761
762    #[test]
763    fn test_model_constants_have_expected_values() {
764        assert_eq!(MODEL_GEMINI_31_PRO, "gemini-3.1-pro-preview");
765        assert_eq!(MODEL_GEMINI_31_FLASH_LITE, "gemini-3.1-flash-lite-preview");
766        assert_eq!(MODEL_GEMINI_3_FLASH, "gemini-3-flash-preview");
767        assert_eq!(MODEL_GEMINI_3_PRO, "gemini-3.0-pro");
768        assert_eq!(MODEL_GEMINI_25_FLASH, "gemini-2.5-flash");
769        assert_eq!(MODEL_GEMINI_25_PRO, "gemini-2.5-pro");
770        assert_eq!(MODEL_GEMINI_2_FLASH, "gemini-2.0-flash");
771        assert_eq!(MODEL_GEMINI_2_FLASH_LITE, "gemini-2.0-flash-lite");
772    }
773
774    #[test]
775    fn test_gemini_20_models_reject_thinking() {
776        let provider = GeminiProvider::flash_lite("test-api-key".to_string());
777        let error = provider
778            .validate_thinking_config(Some(&ThinkingConfig::new(10_000)))
779            .unwrap_err();
780        assert!(error.to_string().contains("thinking is not supported"));
781    }
782
783    #[test]
784    fn test_default_uses_header_auth() {
785        let provider = GeminiProvider::new("test-key".to_string(), "model".to_string());
786        assert!(
787            provider.use_header_auth,
788            "Default should use header auth for security"
789        );
790    }
791
792    #[test]
793    fn test_provider_is_cloneable() {
794        let provider = GeminiProvider::new("test-api-key".to_string(), "test-model".to_string());
795        let cloned = provider.clone();
796
797        assert_eq!(provider.model(), cloned.model());
798        assert_eq!(provider.provider(), cloned.provider());
799    }
800
801    fn request_with_max_tokens(max_tokens: u32, explicit: bool) -> ChatRequest {
802        ChatRequest {
803            system: String::new(),
804            messages: vec![agent_sdk_foundation::llm::Message::user("hi")],
805            tools: None,
806            max_tokens,
807            max_tokens_explicit: explicit,
808            session_id: None,
809            cached_content: None,
810            thinking: None,
811            tool_choice: None,
812            response_format: None,
813            cache: None,
814        }
815    }
816
817    #[test]
818    fn test_effective_max_tokens_honors_explicit_budget() {
819        let provider = GeminiProvider::pro("test-api-key".to_string());
820        let request = request_with_max_tokens(123, true);
821        assert_eq!(provider.effective_max_tokens(&request), 123);
822    }
823
824    #[test]
825    fn test_effective_max_tokens_uses_default_when_implicit() {
826        // An implicit budget must fall back to the provider/model default, not
827        // be silently capped at ChatRequest::DEFAULT_MAX_TOKENS.
828        let provider = GeminiProvider::pro("test-api-key".to_string());
829        let request = request_with_max_tokens(4096, false);
830        assert_eq!(
831            provider.effective_max_tokens(&request),
832            provider.default_max_tokens()
833        );
834    }
835}