rainy_sdk/
models.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4
5/// Represents a single message in a chat conversation.
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
7pub struct ChatMessage {
8    /// The role of the message author.
9    pub role: MessageRole,
10    /// The content of the message.
11    pub content: String,
12}
13
14/// The role of a message's author.
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
16#[serde(rename_all = "lowercase")]
17pub enum MessageRole {
18    /// A message from the system, setting the context or instructions for the assistant.
19    System,
20    /// A message from the user.
21    User,
22    /// A message from the assistant.
23    Assistant,
24}
25
26/// Represents a request to create a chat completion.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct ChatCompletionRequest {
29    /// The identifier of the model to use for the completion (e.g., "gpt-4o", "claude-sonnet-4").
30    pub model: String,
31
32    /// A list of messages that form the conversation history.
33    pub messages: Vec<ChatMessage>,
34
35    /// The sampling temperature to use, between 0.0 and 2.0. Higher values will make the output
36    /// more random, while lower values will make it more focused and deterministic.
37    #[serde(skip_serializing_if = "Option::is_none")]
38    pub temperature: Option<f32>,
39
40    /// The maximum number of tokens to generate in the completion.
41    #[serde(skip_serializing_if = "Option::is_none")]
42    pub max_tokens: Option<u32>,
43
44    /// The nucleus sampling parameter. The model considers the results of the tokens with `top_p`
45    /// probability mass. So, 0.1 means only the tokens comprising the top 10% probability mass are considered.
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub top_p: Option<f32>,
48
49    /// A penalty applied to new tokens based on their frequency in the text so far.
50    /// It decreases the model's likelihood to repeat the same line verbatim.
51    #[serde(skip_serializing_if = "Option::is_none")]
52    pub frequency_penalty: Option<f32>,
53
54    /// A penalty applied to new tokens based on whether they appear in the text so far.
55    /// It increases the model's likelihood to talk about new topics.
56    #[serde(skip_serializing_if = "Option::is_none")]
57    pub presence_penalty: Option<f32>,
58
59    /// A list of sequences that will cause the model to stop generating further tokens.
60    #[serde(skip_serializing_if = "Option::is_none")]
61    pub stop: Option<Vec<String>>,
62
63    /// A unique identifier representing your end-user, which can help in monitoring and
64    /// tracking conversations.
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub user: Option<String>,
67
68    /// A hint to the router about which provider to use for the model.
69    #[serde(skip_serializing_if = "Option::is_none")]
70    pub provider: Option<String>,
71
72    /// If set to `true`, the response will be streamed as a series of events.
73    #[serde(skip_serializing_if = "Option::is_none")]
74    pub stream: Option<bool>,
75
76    /// Modify the likelihood of specified tokens appearing in the completion.
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub logit_bias: Option<serde_json::Value>,
79
80    /// Whether to return log probabilities of the output tokens.
81    #[serde(skip_serializing_if = "Option::is_none")]
82    pub logprobs: Option<bool>,
83
84    /// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position.
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub top_logprobs: Option<u32>,
87
88    /// How many chat completion choices to generate for each input message.
89    #[serde(skip_serializing_if = "Option::is_none")]
90    pub n: Option<u32>,
91
92    /// An object specifying the format that the model must output.
93    #[serde(skip_serializing_if = "Option::is_none")]
94    pub response_format: Option<ResponseFormat>,
95
96    /// A list of tools the model may call.
97    #[serde(skip_serializing_if = "Option::is_none")]
98    pub tools: Option<Vec<Tool>>,
99
100    /// Controls which (if any) tool is called by the model.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub tool_choice: Option<ToolChoice>,
103
104    /// Configuration for thinking capabilities (Gemini 3 and 2.5 series).
105    #[serde(skip_serializing_if = "Option::is_none")]
106    pub thinking_config: Option<ThinkingConfig>,
107}
108
109/// Represents the response from a chat completion request.
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct ChatCompletionResponse {
112    /// A unique identifier for the chat completion.
113    pub id: String,
114
115    /// The type of object, which is always "chat.completion".
116    pub object: String,
117
118    /// The Unix timestamp (in seconds) of when the completion was created.
119    pub created: u64,
120
121    /// The model that was used for the completion.
122    pub model: String,
123
124    /// A list of chat completion choices.
125    pub choices: Vec<ChatChoice>,
126
127    /// Information about the token usage for this completion.
128    #[serde(skip_serializing_if = "Option::is_none")]
129    pub usage: Option<Usage>,
130}
131
132/// Represents a single choice in a chat completion response.
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct ChatChoice {
135    /// The index of the choice in the list of choices.
136    pub index: u32,
137
138    /// The message generated by the model.
139    pub message: ChatMessage,
140
141    /// The reason the model stopped generating tokens.
142    pub finish_reason: String,
143}
144
145/// Represents the token usage statistics for a chat completion.
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct Usage {
148    /// The number of tokens in the prompt.
149    pub prompt_tokens: u32,
150
151    /// The number of tokens in the generated completion.
152    pub completion_tokens: u32,
153
154    /// The total number of tokens used in the request (prompt + completion).
155    pub total_tokens: u32,
156}
157
158/// Represents the health status of the Rainy API.
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct HealthStatus {
161    /// The overall status of the API (e.g., "healthy", "degraded").
162    pub status: String,
163
164    /// The timestamp of when the health check was performed.
165    pub timestamp: String,
166
167    /// The uptime of the system in seconds.
168    pub uptime: f64,
169
170    /// The status of individual services.
171    pub services: ServiceStatus,
172}
173
174/// Represents the status of individual backend services.
175#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct ServiceStatus {
177    /// The status of the database connection.
178    pub database: bool,
179
180    /// The status of the Redis connection, if applicable.
181    #[serde(skip_serializing_if = "Option::is_none")]
182    pub redis: Option<bool>,
183
184    /// The overall status of the connections to AI providers.
185    pub providers: bool,
186}
187
188/// Represents the available models and providers.
189#[derive(Debug, Clone, Serialize, Deserialize)]
190pub struct AvailableModels {
191    /// A map where keys are provider names and values are lists of model names.
192    pub providers: HashMap<String, Vec<String>>,
193
194    /// The total number of available models across all providers.
195    pub total_models: usize,
196
197    /// A list of provider names that are currently active and available.
198    pub active_providers: Vec<String>,
199}
200
201/// Represents information about credit usage for a request.
202#[derive(Debug, Clone, Serialize, Deserialize)]
203pub struct CreditInfo {
204    /// The number of credits available before the request.
205    pub current_credits: f64,
206
207    /// The estimated number of credits that the request will cost.
208    pub estimated_cost: f64,
209
210    /// The estimated number of credits remaining after the request.
211    pub credits_after_request: f64,
212
213    /// The date when the credit balance is next scheduled to be reset.
214    pub reset_date: String,
215}
216
217/// Represents metadata extracted from the response headers of an API request.
218#[derive(Debug, Clone)]
219pub struct RequestMetadata {
220    /// The time taken for the request to complete, in milliseconds.
221    pub response_time: Option<u64>,
222
223    /// The AI provider that handled the request.
224    pub provider: Option<String>,
225
226    /// The number of tokens used in the request.
227    pub tokens_used: Option<u32>,
228
229    /// The number of credits used for the request.
230    pub credits_used: Option<f64>,
231
232    /// The number of credits remaining after the request.
233    pub credits_remaining: Option<f64>,
234
235    /// The unique ID of the request, for tracking and debugging.
236    pub request_id: Option<String>,
237}
238
239/// A collection of predefined model constants for convenience.
240/// All models listed here are confirmed to be 100% OpenAI-compatible without parameter adaptations.
241pub mod model_constants {
242    // OpenAI models (fully compatible)
243    /// Constant for the GPT-4o model.
244    pub const OPENAI_GPT_4O: &str = "gpt-4o";
245    /// Constant for the GPT-5 model.
246    pub const OPENAI_GPT_5: &str = "gpt-5";
247    /// Constant for the GPT-5 Pro model.
248    pub const OPENAI_GPT_5_PRO: &str = "gpt-5-pro";
249    /// Constant for the O3 model.
250    pub const OPENAI_O3: &str = "o3";
251    /// Constant for the O4 Mini model.
252    pub const OPENAI_O4_MINI: &str = "o4-mini";
253
254    // Google Gemini models (fully compatible via official compatibility layer)
255    /// Constant for the Gemini 2.5 Pro model.
256    pub const GOOGLE_GEMINI_2_5_PRO: &str = "gemini-2.5-pro";
257    /// Constant for the Gemini 2.5 Flash model.
258    pub const GOOGLE_GEMINI_2_5_FLASH: &str = "gemini-2.5-flash";
259    /// Constant for the Gemini 2.5 Flash Lite model.
260    pub const GOOGLE_GEMINI_2_5_FLASH_LITE: &str = "gemini-2.5-flash-lite";
261
262    // Gemini 3 series - Advanced reasoning models with thinking capabilities
263    /// Constant for the Gemini 3 Pro model with advanced reasoning.
264    pub const GOOGLE_GEMINI_3_PRO: &str = "gemini-3-pro-preview";
265    /// Constant for the Gemini 3 Flash model with thinking capabilities.
266    pub const GOOGLE_GEMINI_3_FLASH: &str = "gemini-3-flash-preview";
267    /// Constant for the Gemini 3 Pro Image model with multimodal reasoning.
268    pub const GOOGLE_GEMINI_3_PRO_IMAGE: &str = "gemini-3-pro-image-preview";
269
270    // Groq models (fully compatible)
271    /// Constant for the Llama 3.1 8B Instant model.
272    pub const GROQ_LLAMA_3_1_8B_INSTANT: &str = "llama-3.1-8b-instant";
273    /// Constant for the Llama 3.3 70B Versatile model.
274    pub const GROQ_LLAMA_3_3_70B_VERSATILE: &str = "llama-3.3-70b-versatile";
275    /// Constant for the moonshotai/kimi-k2-instruct-0905 Instant model.
276    pub const KIMI_K2_0925: &str = "moonshotai/kimi-k2-instruct-0905";
277
278    // Cerebras models (fully compatible)
279    /// Constant for the Llama3.1 8B model.
280    pub const CEREBRAS_LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
281
282    // Enosis Labs models (fully compatible)
283    /// Constant for the Astronomer 1 model.
284    pub const ASTRONOMER_1: &str = "astronomer-1";
285    /// Constant for the Astronomer 1 Max model.
286    pub const ASTRONOMER_1_MAX: &str = "astronomer-1-max";
287    /// Constant for the Astronomer 1.5 model.
288    pub const ASTRONOMER_1_5: &str = "astronomer-1.5";
289    /// Constant for the Astronomer 2 model.
290    pub const ASTRONOMER_2: &str = "astronomer-2";
291    /// Constant for the Astronomer 2 Pro model.
292    pub const ASTRONOMER_2_PRO: &str = "astronomer-2-pro";
293
294    // Legacy aliases for backward compatibility (deprecated - use provider-prefixed versions above)
295    /// Legacy constant for the GPT-4o model (use OPENAI_GPT_4O instead).
296    #[deprecated(note = "Use OPENAI_GPT_4O instead for OpenAI compatibility")]
297    pub const GPT_4O: &str = "openai/gpt-4o";
298    /// Legacy constant for the GPT-5 model (use OPENAI_GPT_5 instead).
299    #[deprecated(note = "Use OPENAI_GPT_5 instead for OpenAI compatibility")]
300    pub const GPT_5: &str = "openai/gpt-5";
301    /// Legacy constant for the Gemini 2.5 Pro model (use GOOGLE_GEMINI_2_5_PRO instead).
302    #[deprecated(note = "Use GOOGLE_GEMINI_2_5_PRO instead for OpenAI compatibility")]
303    pub const GEMINI_2_5_PRO: &str = "google/gemini-2.5-pro";
304    /// Legacy constant for the Gemini 2.5 Flash model (use GOOGLE_GEMINI_2_5_FLASH instead).
305    #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH instead for OpenAI compatibility")]
306    pub const GEMINI_2_5_FLASH: &str = "google/gemini-2.5-flash";
307    /// Legacy constant for the Gemini 2.5 Flash Lite model (use GOOGLE_GEMINI_2_5_FLASH_LITE instead).
308    #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH_LITE instead for OpenAI compatibility")]
309    pub const GEMINI_2_5_FLASH_LITE: &str = "google/gemini-2.5-flash-lite";
310    /// Legacy constant for the Llama 3.1 8B Instant model (use GROQ_LLAMA_3_1_8B_INSTANT instead).
311    #[deprecated(note = "Use GROQ_LLAMA_3_1_8B_INSTANT instead for OpenAI compatibility")]
312    pub const LLAMA_3_1_8B_INSTANT: &str = "groq/llama-3.1-8b-instant";
313    /// Legacy constant for the Llama3.1 8B model (use CEREBRAS_LLAMA3_1_8B instead).
314    #[deprecated(note = "Use CEREBRAS_LLAMA3_1_8B instead for OpenAI compatibility")]
315    pub const LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
316}
317
318/// A collection of predefined provider name constants for convenience.
319pub mod providers {
320    /// Constant for the OpenAI provider.
321    pub const OPENAI: &str = "openai";
322    /// Constant for the Anthropic provider.
323    pub const ANTHROPIC: &str = "anthropic";
324    /// Constant for the Groq provider.
325    pub const GROQ: &str = "groq";
326    /// Constant for the Cerebras provider.
327    pub const CEREBRAS: &str = "cerebras";
328    /// Constant for the Gemini provider.
329    pub const GEMINI: &str = "gemini";
330    /// Constant for the Enosis Labs provider.
331    pub const ENOSISLABS: &str = "enosislabs";
332}
333
334impl ChatCompletionRequest {
335    /// Creates a new `ChatCompletionRequest` with the given model and messages.
336    ///
337    /// # Arguments
338    ///
339    /// * `model` - The identifier of the model to use.
340    /// * `messages` - The list of messages for the conversation.
341    pub fn new(model: impl Into<String>, messages: Vec<ChatMessage>) -> Self {
342        Self {
343            model: model.into(),
344            messages,
345            temperature: None,
346            max_tokens: None,
347            top_p: None,
348            frequency_penalty: None,
349            presence_penalty: None,
350            stop: None,
351            user: None,
352            provider: None,
353            stream: None,
354            logit_bias: None,
355            logprobs: None,
356            top_logprobs: None,
357            n: None,
358            response_format: None,
359            tools: None,
360            tool_choice: None,
361            thinking_config: None,
362        }
363    }
364
365    /// Sets the temperature for the chat completion.
366    ///
367    /// The temperature is clamped between 0.0 and 2.0.
368    ///
369    /// # Arguments
370    ///
371    /// * `temperature` - The sampling temperature.
372    pub fn with_temperature(mut self, temperature: f32) -> Self {
373        self.temperature = Some(temperature.clamp(0.0, 2.0));
374        self
375    }
376
377    /// Sets the maximum number of tokens to generate.
378    ///
379    /// # Arguments
380    ///
381    /// * `max_tokens` - The maximum number of tokens.
382    pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
383        self.max_tokens = Some(max_tokens);
384        self
385    }
386
387    /// Sets the user identifier for the chat completion.
388    ///
389    /// # Arguments
390    ///
391    /// * `user` - A unique identifier for the end-user.
392    pub fn with_user(mut self, user: impl Into<String>) -> Self {
393        self.user = Some(user.into());
394        self
395    }
396
397    /// Sets a provider hint for the request.
398    ///
399    /// # Arguments
400    ///
401    /// * `provider` - The name of the provider to use.
402    pub fn with_provider(mut self, provider: impl Into<String>) -> Self {
403        self.provider = Some(provider.into());
404        self
405    }
406
407    /// Enables or disables streaming for the response.
408    ///
409    /// # Arguments
410    ///
411    /// * `stream` - `true` to enable streaming, `false` to disable.
412    pub fn with_stream(mut self, stream: bool) -> Self {
413        self.stream = Some(stream);
414        self
415    }
416
417    /// Sets the logit bias for the chat completion.
418    ///
419    /// # Arguments
420    ///
421    /// * `logit_bias` - A map of token IDs to bias values.
422    pub fn with_logit_bias(mut self, logit_bias: serde_json::Value) -> Self {
423        self.logit_bias = Some(logit_bias);
424        self
425    }
426
427    /// Enables or disables log probabilities for the response.
428    ///
429    /// # Arguments
430    ///
431    /// * `logprobs` - `true` to include log probabilities.
432    pub fn with_logprobs(mut self, logprobs: bool) -> Self {
433        self.logprobs = Some(logprobs);
434        self
435    }
436
437    /// Sets the number of most likely tokens to return at each position.
438    ///
439    /// # Arguments
440    ///
441    /// * `top_logprobs` - The number of top log probabilities to return.
442    pub fn with_top_logprobs(mut self, top_logprobs: u32) -> Self {
443        self.top_logprobs = Some(top_logprobs);
444        self
445    }
446
447    /// Sets the number of chat completion choices to generate.
448    ///
449    /// # Arguments
450    ///
451    /// * `n` - The number of completions to generate.
452    pub fn with_n(mut self, n: u32) -> Self {
453        self.n = Some(n);
454        self
455    }
456
457    /// Sets the response format for the chat completion.
458    ///
459    /// # Arguments
460    ///
461    /// * `response_format` - The format the model must output.
462    pub fn with_response_format(mut self, response_format: ResponseFormat) -> Self {
463        self.response_format = Some(response_format);
464        self
465    }
466
467    /// Sets the tools available to the model.
468    ///
469    /// # Arguments
470    ///
471    /// * `tools` - A list of tools the model can use.
472    pub fn with_tools(mut self, tools: Vec<Tool>) -> Self {
473        self.tools = Some(tools);
474        self
475    }
476
477    /// Sets the tool choice for the chat completion.
478    ///
479    /// # Arguments
480    ///
481    /// * `tool_choice` - Controls which tool the model uses.
482    pub fn with_tool_choice(mut self, tool_choice: ToolChoice) -> Self {
483        self.tool_choice = Some(tool_choice);
484        self
485    }
486
487    /// Sets the thinking configuration for Gemini 3 and 2.5 series models.
488    ///
489    /// # Arguments
490    ///
491    /// * `thinking_config` - Configuration for thinking capabilities.
492    pub fn with_thinking_config(mut self, thinking_config: ThinkingConfig) -> Self {
493        self.thinking_config = Some(thinking_config);
494        self
495    }
496
497    /// Enables thought summaries in the response (Gemini 3 and 2.5 series).
498    ///
499    /// # Arguments
500    ///
501    /// * `include_thoughts` - Whether to include thought summaries.
502    pub fn with_include_thoughts(mut self, include_thoughts: bool) -> Self {
503        let mut config = self.thinking_config.unwrap_or_default();
504        config.include_thoughts = Some(include_thoughts);
505        self.thinking_config = Some(config);
506        self
507    }
508
509    /// Sets the thinking level for Gemini 3 models.
510    ///
511    /// # Arguments
512    ///
513    /// * `thinking_level` - The thinking level (minimal, low, medium, high).
514    pub fn with_thinking_level(mut self, thinking_level: ThinkingLevel) -> Self {
515        let mut config = self.thinking_config.unwrap_or_default();
516        config.thinking_level = Some(thinking_level);
517        self.thinking_config = Some(config);
518        self
519    }
520
521    /// Sets the thinking budget for Gemini 2.5 models.
522    ///
523    /// # Arguments
524    ///
525    /// * `thinking_budget` - Number of thinking tokens (-1 for dynamic, 0 to disable).
526    pub fn with_thinking_budget(mut self, thinking_budget: i32) -> Self {
527        let mut config = self.thinking_config.unwrap_or_default();
528        config.thinking_budget = Some(thinking_budget);
529        self.thinking_config = Some(config);
530        self
531    }
532
533    /// Validates that the request parameters are compatible with OpenAI standards.
534    ///
535    /// This method checks parameter ranges and values to ensure they match OpenAI's API specifications.
536    /// Also validates Gemini 3 specific parameters like thinking configuration.
537    ///
538    /// # Returns
539    ///
540    /// A `Result` indicating whether the request is valid for OpenAI compatibility.
541    pub fn validate_openai_compatibility(&self) -> Result<(), String> {
542        // Validate temperature
543        if let Some(temp) = self.temperature {
544            if !(0.0..=2.0).contains(&temp) {
545                return Err(format!(
546                    "Temperature must be between 0.0 and 2.0, got {}",
547                    temp
548                ));
549            }
550        }
551
552        // Validate top_p
553        if let Some(top_p) = self.top_p {
554            if !(0.0..=1.0).contains(&top_p) {
555                return Err(format!("Top-p must be between 0.0 and 1.0, got {}", top_p));
556            }
557        }
558
559        // Validate frequency_penalty
560        if let Some(fp) = self.frequency_penalty {
561            if !(-2.0..=2.0).contains(&fp) {
562                return Err(format!(
563                    "Frequency penalty must be between -2.0 and 2.0, got {}",
564                    fp
565                ));
566            }
567        }
568
569        // Validate presence_penalty
570        if let Some(pp) = self.presence_penalty {
571            if !(-2.0..=2.0).contains(&pp) {
572                return Err(format!(
573                    "Presence penalty must be between -2.0 and 2.0, got {}",
574                    pp
575                ));
576            }
577        }
578
579        // Validate max_tokens
580        if let Some(mt) = self.max_tokens {
581            if mt == 0 {
582                return Err("Max tokens must be greater than 0".to_string());
583            }
584        }
585
586        // Validate top_logprobs
587        if let Some(tlp) = self.top_logprobs {
588            if !(0..=20).contains(&tlp) {
589                return Err(format!(
590                    "Top logprobs must be between 0 and 20, got {}",
591                    tlp
592                ));
593            }
594        }
595
596        // Validate n
597        if let Some(n) = self.n {
598            if n == 0 {
599                return Err("n must be greater than 0".to_string());
600            }
601        }
602
603        // Validate stop sequences
604        if let Some(stop) = &self.stop {
605            if stop.len() > 4 {
606                return Err("Cannot have more than 4 stop sequences".to_string());
607            }
608            for seq in stop {
609                if seq.is_empty() {
610                    return Err("Stop sequences cannot be empty".to_string());
611                }
612                if seq.len() > 64 {
613                    return Err("Stop sequences cannot be longer than 64 characters".to_string());
614                }
615            }
616        }
617
618        // Validate thinking configuration for Gemini models
619        if let Some(thinking_config) = &self.thinking_config {
620            self.validate_thinking_config(thinking_config)?;
621        }
622
623        Ok(())
624    }
625
626    /// Validates thinking configuration parameters for Gemini models.
627    fn validate_thinking_config(&self, config: &ThinkingConfig) -> Result<(), String> {
628        let is_gemini_3 = self.model.contains("gemini-3");
629        let is_gemini_2_5 = self.model.contains("gemini-2.5");
630        let is_gemini_3_pro = self.model.contains("gemini-3-pro");
631
632        // Validate thinking level (Gemini 3 only)
633        if let Some(level) = &config.thinking_level {
634            if !is_gemini_3 {
635                return Err("thinking_level is only supported for Gemini 3 models".to_string());
636            }
637
638            match level {
639                ThinkingLevel::Minimal | ThinkingLevel::Medium => {
640                    if is_gemini_3_pro {
641                        return Err(
642                            "Gemini 3 Pro only supports 'low' and 'high' thinking levels"
643                                .to_string(),
644                        );
645                    }
646                }
647                _ => {}
648            }
649        }
650
651        // Validate thinking budget (Gemini 2.5 only)
652        if let Some(budget) = config.thinking_budget {
653            if !is_gemini_2_5 {
654                return Err("thinking_budget is only supported for Gemini 2.5 models".to_string());
655            }
656
657            // Validate budget ranges based on model
658            if self.model.contains("2.5-pro") {
659                if budget != -1 && !(128..=32768).contains(&budget) {
660                    return Err(
661                        "Gemini 2.5 Pro thinking budget must be -1 (dynamic) or between 128-32768"
662                            .to_string(),
663                    );
664                }
665            } else if self.model.contains("2.5-flash")
666                && budget != -1
667                && !(0..=24576).contains(&budget)
668            {
669                return Err(
670                    "Gemini 2.5 Flash thinking budget must be -1 (dynamic) or between 0-24576"
671                        .to_string(),
672                );
673            }
674        }
675
676        // Warn about conflicting parameters
677        if config.thinking_level.is_some() && config.thinking_budget.is_some() {
678            return Err("Cannot specify both thinking_level (Gemini 3) and thinking_budget (Gemini 2.5) in the same request".to_string());
679        }
680
681        Ok(())
682    }
683
684    /// Checks if the model supports thinking capabilities.
685    pub fn supports_thinking(&self) -> bool {
686        self.model.contains("gemini-3") || self.model.contains("gemini-2.5")
687    }
688
689    /// Checks if the model requires thought signatures for function calling.
690    pub fn requires_thought_signatures(&self) -> bool {
691        self.model.contains("gemini-3")
692    }
693}
694
695impl ChatMessage {
696    /// Creates a new message with the `System` role.
697    ///
698    /// # Arguments
699    ///
700    /// * `content` - The content of the system message.
701    pub fn system(content: impl Into<String>) -> Self {
702        Self {
703            role: MessageRole::System,
704            content: content.into(),
705        }
706    }
707
708    /// Creates a new message with the `User` role.
709    ///
710    /// # Arguments
711    ///
712    /// * `content` - The content of the user message.
713    pub fn user(content: impl Into<String>) -> Self {
714        Self {
715            role: MessageRole::User,
716            content: content.into(),
717        }
718    }
719
720    /// Creates a new message with the `Assistant` role.
721    ///
722    /// # Arguments
723    ///
724    /// * `content` - The content of the assistant message.
725    pub fn assistant(content: impl Into<String>) -> Self {
726        Self {
727            role: MessageRole::Assistant,
728            content: content.into(),
729        }
730    }
731}
732
733// Legacy compatibility types - keep existing types for backward compatibility
734use uuid::Uuid;
735
736/// Represents a user account (legacy).
737#[derive(Debug, Clone, Serialize, Deserialize)]
738pub struct User {
739    /// The unique ID of the user.
740    pub id: Uuid,
741    /// The user's identifier string.
742    pub user_id: String,
743    /// The name of the user's subscription plan.
744    pub plan_name: String,
745    /// The user's current credit balance.
746    pub current_credits: f64,
747    /// The amount of credits the user has used in the current month.
748    pub credits_used_this_month: f64,
749    /// The date when the user's credits will reset.
750    pub credits_reset_date: DateTime<Utc>,
751    /// Indicates if the user account is active.
752    pub is_active: bool,
753    /// The timestamp of when the user account was created.
754    pub created_at: DateTime<Utc>,
755}
756
757/// Represents an API key (legacy).
758#[derive(Debug, Clone, Serialize, Deserialize)]
759pub struct ApiKey {
760    /// The unique ID of the API key.
761    pub id: Uuid,
762    /// The API key string.
763    pub key: String,
764    /// The ID of the user who owns the key.
765    pub owner_id: Uuid,
766    /// Indicates if the API key is active.
767    pub is_active: bool,
768    /// The timestamp of when the key was created.
769    pub created_at: DateTime<Utc>,
770    /// The expiration date of the key, if any.
771    pub expires_at: Option<DateTime<Utc>>,
772    /// A description of the key.
773    pub description: Option<String>,
774    /// The timestamp of when the key was last used.
775    pub last_used_at: Option<DateTime<Utc>>,
776}
777
778/// Represents usage statistics over a period (legacy).
779#[derive(Debug, Clone, Serialize, Deserialize)]
780pub struct UsageStats {
781    /// The number of days in the usage period.
782    pub period_days: u32,
783    /// A list of daily usage data.
784    pub daily_usage: Vec<DailyUsage>,
785    /// A list of recent credit transactions.
786    pub recent_transactions: Vec<CreditTransaction>,
787    /// The total number of requests made in the period.
788    pub total_requests: u64,
789    /// The total number of tokens used in the period.
790    pub total_tokens: u64,
791}
792
793/// Represents usage data for a single day (legacy).
794#[derive(Debug, Clone, Serialize, Deserialize)]
795pub struct DailyUsage {
796    /// The date for the usage data.
797    pub date: String,
798    /// The number of credits used on this day.
799    pub credits_used: f64,
800    /// The number of requests made on this day.
801    pub requests: u64,
802    /// The number of tokens used on this day.
803    pub tokens: u64,
804}
805
806/// Represents a single credit transaction (legacy).
807#[derive(Debug, Clone, Serialize, Deserialize)]
808pub struct CreditTransaction {
809    /// The unique ID of the transaction.
810    pub id: Uuid,
811    /// The type of the transaction.
812    pub transaction_type: TransactionType,
813    /// The amount of credits involved in the transaction.
814    pub credits_amount: f64,
815    /// The credit balance after the transaction.
816    pub credits_balance_after: f64,
817    /// The provider associated with the transaction, if any.
818    pub provider: Option<String>,
819    /// The model associated with the transaction, if any.
820    pub model: Option<String>,
821    /// A description of the transaction.
822    pub description: String,
823    /// The timestamp of when the transaction occurred.
824    pub created_at: DateTime<Utc>,
825}
826
827/// The type of credit transaction (legacy).
828#[derive(Debug, Clone, Serialize, Deserialize)]
829#[serde(rename_all = "lowercase")]
830pub enum TransactionType {
831    /// A transaction for API usage.
832    Usage,
833    /// A transaction for a credit reset.
834    Reset,
835    /// A transaction for a credit purchase.
836    Purchase,
837    /// A transaction for a credit refund.
838    Refund,
839}
840
841// Legacy aliases for backward compatibility
842/// A legacy type alias for `MessageRole`.
843pub type ChatRole = MessageRole;
844/// A legacy type alias for `Usage`.
845pub type ChatUsage = Usage;
846/// A legacy type alias for `HealthStatus`.
847pub type HealthCheck = HealthStatus;
848
849/// Represents the status of backend services (legacy).
850#[derive(Debug, Clone, Serialize, Deserialize)]
851pub struct HealthServices {
852    /// The status of the database connection.
853    pub database: bool,
854    /// The status of the Redis connection.
855    pub redis: bool,
856    /// The overall status of AI providers.
857    pub providers: bool,
858}
859
860/// The health status of the API (legacy).
861#[derive(Debug, Clone, Serialize, Deserialize)]
862#[serde(rename_all = "lowercase")]
863pub enum HealthStatusEnum {
864    /// The API is healthy.
865    Healthy,
866    /// The API is in a degraded state.
867    Degraded,
868    /// The API is unhealthy.
869    Unhealthy,
870    /// The API needs initialization.
871    NeedsInit,
872}
873
874/// Represents the format that the model must output.
875#[derive(Debug, Clone, Serialize, Deserialize)]
876#[serde(rename_all = "snake_case")]
877pub enum ResponseFormat {
878    /// The model can return text.
879    Text,
880    /// The model must return a valid JSON object.
881    JsonObject,
882    /// The model must return a JSON object that matches the provided schema.
883    JsonSchema { json_schema: serde_json::Value },
884}
885
886/// Represents a tool that the model can use.
887#[derive(Debug, Clone, Serialize, Deserialize)]
888pub struct Tool {
889    /// The type of the tool.
890    pub r#type: ToolType,
891    /// The function definition for the tool.
892    pub function: FunctionDefinition,
893}
894
895/// The type of tool.
896#[derive(Debug, Clone, Serialize, Deserialize)]
897#[serde(rename_all = "snake_case")]
898pub enum ToolType {
899    /// A function tool.
900    Function,
901}
902
903/// Represents a function definition for a tool.
904#[derive(Debug, Clone, Serialize, Deserialize)]
905pub struct FunctionDefinition {
906    /// The name of the function.
907    pub name: String,
908    /// A description of what the function does.
909    #[serde(skip_serializing_if = "Option::is_none")]
910    pub description: Option<String>,
911    /// The parameters the function accepts, described as a JSON Schema object.
912    #[serde(skip_serializing_if = "Option::is_none")]
913    pub parameters: Option<serde_json::Value>,
914}
915
916/// Controls which tool is called by the model.
917#[derive(Debug, Clone, Serialize, Deserialize)]
918#[serde(untagged)]
919pub enum ToolChoice {
920    /// No tool is called.
921    None,
922    /// The model chooses which tool to call.
923    Auto,
924    /// A specific tool is called.
925    Tool {
926        r#type: ToolType,
927        function: ToolFunction,
928    },
929}
930
931/// Represents a tool function call.
932#[derive(Debug, Clone, Serialize, Deserialize)]
933pub struct ToolFunction {
934    /// The name of the function to call.
935    pub name: String,
936}
937
938/// Configuration for thinking capabilities in Gemini 3 and 2.5 series models.
939#[derive(Debug, Clone, Serialize, Deserialize, Default)]
940pub struct ThinkingConfig {
941    /// Whether to include thought summaries in the response.
942    #[serde(skip_serializing_if = "Option::is_none")]
943    pub include_thoughts: Option<bool>,
944
945    /// The thinking level for Gemini 3 models (low, high for Pro; minimal, low, medium, high for Flash).
946    #[serde(skip_serializing_if = "Option::is_none")]
947    pub thinking_level: Option<ThinkingLevel>,
948
949    /// The thinking budget for Gemini 2.5 models (number of thinking tokens).
950    #[serde(skip_serializing_if = "Option::is_none")]
951    pub thinking_budget: Option<i32>,
952}
953
954/// Thinking levels for Gemini 3 models.
955#[derive(Debug, Clone, Serialize, Deserialize)]
956#[serde(rename_all = "lowercase")]
957pub enum ThinkingLevel {
958    /// Minimal thinking (Gemini 3 Flash only) - model likely won't think.
959    Minimal,
960    /// Low thinking level - faster responses with basic reasoning.
961    Low,
962    /// Medium thinking level (Gemini 3 Flash only) - balanced reasoning and speed.
963    Medium,
964    /// High thinking level - deep reasoning for complex tasks (default).
965    High,
966}
967
968/// Represents a content part that may include thought signatures.
969#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
970pub struct ContentPart {
971    /// The text content of the part.
972    #[serde(skip_serializing_if = "Option::is_none")]
973    pub text: Option<String>,
974
975    /// Function call information if this part contains a function call.
976    #[serde(skip_serializing_if = "Option::is_none")]
977    pub function_call: Option<FunctionCall>,
978
979    /// Function response information if this part contains a function response.
980    #[serde(skip_serializing_if = "Option::is_none")]
981    pub function_response: Option<FunctionResponse>,
982
983    /// Indicates if this part contains thought content.
984    #[serde(skip_serializing_if = "Option::is_none")]
985    pub thought: Option<bool>,
986
987    /// Encrypted thought signature for preserving reasoning context across turns.
988    #[serde(skip_serializing_if = "Option::is_none")]
989    pub thought_signature: Option<String>,
990}
991
992/// Represents a function call in the content.
993#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
994pub struct FunctionCall {
995    /// The name of the function being called.
996    pub name: String,
997    /// The arguments for the function call as a JSON object.
998    pub args: serde_json::Value,
999}
1000
1001/// Represents a function response in the content.
1002#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1003pub struct FunctionResponse {
1004    /// The name of the function that was called.
1005    pub name: String,
1006    /// The response from the function call.
1007    pub response: serde_json::Value,
1008}
1009
1010/// Enhanced chat message that supports Gemini 3 thinking capabilities.
1011#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1012pub struct EnhancedChatMessage {
1013    /// The role of the message author.
1014    pub role: MessageRole,
1015    /// The content parts of the message (supports text, function calls, and thought signatures).
1016    pub parts: Vec<ContentPart>,
1017}
1018
1019/// Enhanced usage statistics that include thinking tokens.
1020#[derive(Debug, Clone, Serialize, Deserialize)]
1021pub struct EnhancedUsage {
1022    /// The number of tokens in the prompt.
1023    pub prompt_tokens: u32,
1024    /// The number of tokens in the generated completion.
1025    pub completion_tokens: u32,
1026    /// The total number of tokens used in the request (prompt + completion).
1027    pub total_tokens: u32,
1028    /// The number of thinking tokens used (Gemini 3 and 2.5 series).
1029    #[serde(skip_serializing_if = "Option::is_none")]
1030    pub thoughts_token_count: Option<u32>,
1031}
1032
1033impl ThinkingConfig {
1034    /// Creates a new thinking configuration with default values.
1035    pub fn new() -> Self {
1036        Self::default()
1037    }
1038
1039    /// Creates a configuration for Gemini 3 models with specified thinking level.
1040    ///
1041    /// # Arguments
1042    ///
1043    /// * `level` - The thinking level to use.
1044    /// * `include_thoughts` - Whether to include thought summaries.
1045    pub fn gemini_3(level: ThinkingLevel, include_thoughts: bool) -> Self {
1046        Self {
1047            thinking_level: Some(level),
1048            include_thoughts: Some(include_thoughts),
1049            thinking_budget: None,
1050        }
1051    }
1052
1053    /// Creates a configuration for Gemini 2.5 models with specified thinking budget.
1054    ///
1055    /// # Arguments
1056    ///
1057    /// * `budget` - The thinking budget (-1 for dynamic, 0 to disable, or specific token count).
1058    /// * `include_thoughts` - Whether to include thought summaries.
1059    pub fn gemini_2_5(budget: i32, include_thoughts: bool) -> Self {
1060        Self {
1061            thinking_budget: Some(budget),
1062            include_thoughts: Some(include_thoughts),
1063            thinking_level: None,
1064        }
1065    }
1066
1067    /// Creates a configuration optimized for complex reasoning tasks.
1068    pub fn high_reasoning() -> Self {
1069        Self {
1070            thinking_level: Some(ThinkingLevel::High),
1071            include_thoughts: Some(true),
1072            thinking_budget: Some(-1), // Dynamic for 2.5 models
1073        }
1074    }
1075
1076    /// Creates a configuration optimized for fast responses.
1077    pub fn fast_response() -> Self {
1078        Self {
1079            thinking_level: Some(ThinkingLevel::Low),
1080            include_thoughts: Some(false),
1081            thinking_budget: Some(512), // Low budget for 2.5 models
1082        }
1083    }
1084}
1085
1086impl ContentPart {
1087    /// Creates a new text content part.
1088    pub fn text(content: impl Into<String>) -> Self {
1089        Self {
1090            text: Some(content.into()),
1091            function_call: None,
1092            function_response: None,
1093            thought: None,
1094            thought_signature: None,
1095        }
1096    }
1097
1098    /// Creates a new function call content part.
1099    pub fn function_call(name: impl Into<String>, args: serde_json::Value) -> Self {
1100        Self {
1101            text: None,
1102            function_call: Some(FunctionCall {
1103                name: name.into(),
1104                args,
1105            }),
1106            function_response: None,
1107            thought: None,
1108            thought_signature: None,
1109        }
1110    }
1111
1112    /// Creates a new function response content part.
1113    pub fn function_response(name: impl Into<String>, response: serde_json::Value) -> Self {
1114        Self {
1115            text: None,
1116            function_call: None,
1117            function_response: Some(FunctionResponse {
1118                name: name.into(),
1119                response,
1120            }),
1121            thought: None,
1122            thought_signature: None,
1123        }
1124    }
1125
1126    /// Adds a thought signature to this content part.
1127    pub fn with_thought_signature(mut self, signature: impl Into<String>) -> Self {
1128        self.thought_signature = Some(signature.into());
1129        self
1130    }
1131
1132    /// Marks this content part as containing thought content.
1133    pub fn as_thought(mut self) -> Self {
1134        self.thought = Some(true);
1135        self
1136    }
1137}
1138
1139impl EnhancedChatMessage {
1140    /// Creates a new enhanced message with the `System` role.
1141    pub fn system(content: impl Into<String>) -> Self {
1142        Self {
1143            role: MessageRole::System,
1144            parts: vec![ContentPart::text(content)],
1145        }
1146    }
1147
1148    /// Creates a new enhanced message with the `User` role.
1149    pub fn user(content: impl Into<String>) -> Self {
1150        Self {
1151            role: MessageRole::User,
1152            parts: vec![ContentPart::text(content)],
1153        }
1154    }
1155
1156    /// Creates a new enhanced message with the `Assistant` role.
1157    pub fn assistant(content: impl Into<String>) -> Self {
1158        Self {
1159            role: MessageRole::Assistant,
1160            parts: vec![ContentPart::text(content)],
1161        }
1162    }
1163
1164    /// Creates a new enhanced message with multiple content parts.
1165    pub fn with_parts(role: MessageRole, parts: Vec<ContentPart>) -> Self {
1166        Self { role, parts }
1167    }
1168}
1169
1170/// Represents a streaming chat completion response (OpenAI delta format).
1171#[derive(Debug, Clone, Serialize, Deserialize)]
1172pub struct ChatCompletionStreamResponse {
1173    /// A unique identifier for the chat completion.
1174    pub id: String,
1175    /// The type of object, which is always "chat.completion.chunk".
1176    pub object: String,
1177    /// The Unix timestamp (in seconds) of when the completion was created.
1178    pub created: u64,
1179    /// The model that was used for the completion.
1180    pub model: String,
1181    /// A list of chat completion choices.
1182    pub choices: Vec<ChatCompletionStreamChoice>,
1183    /// Information about the token usage for this completion (only present in the final chunk).
1184    #[serde(skip_serializing_if = "Option::is_none")]
1185    pub usage: Option<Usage>,
1186}
1187
1188/// Represents a single choice in a streaming chat completion response.
1189#[derive(Debug, Clone, Serialize, Deserialize)]
1190pub struct ChatCompletionStreamChoice {
1191    /// The index of the choice in the list of choices.
1192    pub index: u32,
1193    /// The delta containing the new content for this choice.
1194    pub delta: ChatCompletionStreamDelta,
1195    /// The reason the model stopped generating tokens (only present in the final chunk).
1196    #[serde(skip_serializing_if = "Option::is_none")]
1197    pub finish_reason: Option<String>,
1198}
1199
1200/// Represents the delta (change) in a streaming chat completion response.
1201#[derive(Debug, Clone, Serialize, Deserialize)]
1202pub struct ChatCompletionStreamDelta {
1203    /// The role of the message (only present in the first chunk).
1204    #[serde(skip_serializing_if = "Option::is_none")]
1205    pub role: Option<String>,
1206    /// The new content for this chunk.
1207    #[serde(skip_serializing_if = "Option::is_none")]
1208    pub content: Option<String>,
1209    /// Tool calls for this chunk (if any).
1210    #[serde(skip_serializing_if = "Option::is_none")]
1211    pub tool_calls: Option<Vec<ToolCall>>,
1212}
1213
1214/// Represents a tool call in a streaming response.
1215#[derive(Debug, Clone, Serialize, Deserialize)]
1216pub struct ToolCall {
1217    /// The index of the tool call.
1218    pub index: u32,
1219    /// The ID of the tool call.
1220    #[serde(skip_serializing_if = "Option::is_none")]
1221    pub id: Option<String>,
1222    /// The type of the tool call.
1223    #[serde(skip_serializing_if = "Option::is_none")]
1224    pub r#type: Option<String>,
1225    /// The function being called.
1226    #[serde(skip_serializing_if = "Option::is_none")]
1227    pub function: Option<ToolCallFunction>,
1228}
1229
1230/// Represents a function call in a tool call.
1231#[derive(Debug, Clone, Serialize, Deserialize)]
1232pub struct ToolCallFunction {
1233    /// The name of the function.
1234    #[serde(skip_serializing_if = "Option::is_none")]
1235    pub name: Option<String>,
1236    /// The arguments for the function.
1237    #[serde(skip_serializing_if = "Option::is_none")]
1238    pub arguments: Option<String>,
1239}