rainy_sdk/
models.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4
5/// Represents a single message in a chat conversation.
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
7pub struct ChatMessage {
8    /// The role of the message author.
9    pub role: MessageRole,
10    /// The content of the message.
11    pub content: String,
12}
13
14/// The role of a message's author.
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
16#[serde(rename_all = "lowercase")]
17pub enum MessageRole {
18    /// A message from the system, setting the context or instructions for the assistant.
19    System,
20    /// A message from the user.
21    User,
22    /// A message from the assistant.
23    Assistant,
24}
25
26/// Represents a request to create a chat completion.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct ChatCompletionRequest {
29    /// The identifier of the model to use for the completion (e.g., "gpt-4o", "claude-sonnet-4").
30    pub model: String,
31
32    /// A list of messages that form the conversation history.
33    pub messages: Vec<ChatMessage>,
34
35    /// The sampling temperature to use, between 0.0 and 2.0. Higher values will make the output
36    /// more random, while lower values will make it more focused and deterministic.
37    #[serde(skip_serializing_if = "Option::is_none")]
38    pub temperature: Option<f32>,
39
40    /// The maximum number of tokens to generate in the completion.
41    #[serde(skip_serializing_if = "Option::is_none")]
42    pub max_tokens: Option<u32>,
43
44    /// The nucleus sampling parameter. The model considers the results of the tokens with `top_p`
45    /// probability mass. So, 0.1 means only the tokens comprising the top 10% probability mass are considered.
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub top_p: Option<f32>,
48
49    /// A penalty applied to new tokens based on their frequency in the text so far.
50    /// It decreases the model's likelihood to repeat the same line verbatim.
51    #[serde(skip_serializing_if = "Option::is_none")]
52    pub frequency_penalty: Option<f32>,
53
54    /// A penalty applied to new tokens based on whether they appear in the text so far.
55    /// It increases the model's likelihood to talk about new topics.
56    #[serde(skip_serializing_if = "Option::is_none")]
57    pub presence_penalty: Option<f32>,
58
59    /// A list of sequences that will cause the model to stop generating further tokens.
60    #[serde(skip_serializing_if = "Option::is_none")]
61    pub stop: Option<Vec<String>>,
62
63    /// A unique identifier representing your end-user, which can help in monitoring and
64    /// tracking conversations.
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub user: Option<String>,
67
68    /// A hint to the router about which provider to use for the model.
69    #[serde(skip_serializing_if = "Option::is_none")]
70    pub provider: Option<String>,
71
72    /// If set to `true`, the response will be streamed as a series of events.
73    #[serde(skip_serializing_if = "Option::is_none")]
74    pub stream: Option<bool>,
75
76    /// Modify the likelihood of specified tokens appearing in the completion.
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub logit_bias: Option<serde_json::Value>,
79
80    /// Whether to return log probabilities of the output tokens.
81    #[serde(skip_serializing_if = "Option::is_none")]
82    pub logprobs: Option<bool>,
83
84    /// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position.
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub top_logprobs: Option<u32>,
87
88    /// How many chat completion choices to generate for each input message.
89    #[serde(skip_serializing_if = "Option::is_none")]
90    pub n: Option<u32>,
91
92    /// An object specifying the format that the model must output.
93    #[serde(skip_serializing_if = "Option::is_none")]
94    pub response_format: Option<ResponseFormat>,
95
96    /// A list of tools the model may call.
97    #[serde(skip_serializing_if = "Option::is_none")]
98    pub tools: Option<Vec<Tool>>,
99
100    /// Controls which (if any) tool is called by the model.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub tool_choice: Option<ToolChoice>,
103
104    /// Configuration for thinking capabilities (Gemini 3 and 2.5 series).
105    #[serde(skip_serializing_if = "Option::is_none")]
106    pub thinking_config: Option<ThinkingConfig>,
107}
108
109/// Represents the response from a chat completion request.
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct ChatCompletionResponse {
112    /// A unique identifier for the chat completion.
113    pub id: String,
114
115    /// The type of object, which is always "chat.completion".
116    pub object: String,
117
118    /// The Unix timestamp (in seconds) of when the completion was created.
119    pub created: u64,
120
121    /// The model that was used for the completion.
122    pub model: String,
123
124    /// A list of chat completion choices.
125    pub choices: Vec<ChatChoice>,
126
127    /// Information about the token usage for this completion.
128    #[serde(skip_serializing_if = "Option::is_none")]
129    pub usage: Option<Usage>,
130}
131
132/// Represents a single choice in a chat completion response.
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct ChatChoice {
135    /// The index of the choice in the list of choices.
136    pub index: u32,
137
138    /// The message generated by the model.
139    pub message: ChatMessage,
140
141    /// The reason the model stopped generating tokens.
142    pub finish_reason: String,
143}
144
145/// Represents the token usage statistics for a chat completion.
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct Usage {
148    /// The number of tokens in the prompt.
149    pub prompt_tokens: u32,
150
151    /// The number of tokens in the generated completion.
152    pub completion_tokens: u32,
153
154    /// The total number of tokens used in the request (prompt + completion).
155    pub total_tokens: u32,
156}
157
158/// Represents the health status of the Rainy API.
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct HealthStatus {
161    /// The overall status of the API (e.g., "healthy", "degraded").
162    pub status: String,
163
164    /// The timestamp of when the health check was performed.
165    pub timestamp: String,
166
167    /// The uptime of the system in seconds.
168    pub uptime: f64,
169
170    /// The status of individual services.
171    pub services: ServiceStatus,
172}
173
174/// Represents the status of individual backend services.
175#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct ServiceStatus {
177    /// The status of the database connection.
178    pub database: bool,
179
180    /// The status of the Redis connection, if applicable.
181    #[serde(skip_serializing_if = "Option::is_none")]
182    pub redis: Option<bool>,
183
184    /// The overall status of the connections to AI providers.
185    pub providers: bool,
186}
187
188/// Represents the available models and providers.
189#[derive(Debug, Clone, Serialize, Deserialize)]
190pub struct AvailableModels {
191    /// A map where keys are provider names and values are lists of model names.
192    pub providers: HashMap<String, Vec<String>>,
193
194    /// The total number of available models across all providers.
195    pub total_models: usize,
196
197    /// A list of provider names that are currently active and available.
198    pub active_providers: Vec<String>,
199}
200
201/// Represents information about credit usage for a request.
202#[derive(Debug, Clone, Serialize, Deserialize)]
203pub struct CreditInfo {
204    /// The number of credits available before the request.
205    pub current_credits: f64,
206
207    /// The estimated number of credits that the request will cost.
208    pub estimated_cost: f64,
209
210    /// The estimated number of credits remaining after the request.
211    pub credits_after_request: f64,
212
213    /// The date when the credit balance is next scheduled to be reset.
214    pub reset_date: String,
215}
216
217/// Represents metadata extracted from the response headers of an API request.
218#[derive(Debug, Clone)]
219pub struct RequestMetadata {
220    /// The time taken for the request to complete, in milliseconds.
221    pub response_time: Option<u64>,
222
223    /// The AI provider that handled the request.
224    pub provider: Option<String>,
225
226    /// The number of tokens used in the request.
227    pub tokens_used: Option<u32>,
228
229    /// The number of credits used for the request.
230    pub credits_used: Option<f64>,
231
232    /// The number of credits remaining after the request.
233    pub credits_remaining: Option<f64>,
234
235    /// The unique ID of the request, for tracking and debugging.
236    pub request_id: Option<String>,
237}
238
239/// A collection of predefined model constants for convenience.
240/// All models listed here are confirmed to be 100% OpenAI-compatible without parameter adaptations.
241pub mod model_constants {
242    // OpenAI models (fully compatible)
243    /// Constant for the GPT-4o model.
244    pub const OPENAI_GPT_4O: &str = "gpt-4o";
245    /// Constant for the GPT-5 model.
246    pub const OPENAI_GPT_5: &str = "gpt-5";
247    /// Constant for the GPT-5 Pro model.
248    pub const OPENAI_GPT_5_PRO: &str = "gpt-5-pro";
249    /// Constant for the O3 model.
250    pub const OPENAI_O3: &str = "o3";
251    /// Constant for the O4 Mini model.
252    pub const OPENAI_O4_MINI: &str = "o4-mini";
253
254    // Google Gemini models (fully compatible via official compatibility layer)
255    /// Constant for the Gemini 2.5 Pro model.
256    pub const GOOGLE_GEMINI_2_5_PRO: &str = "gemini-2.5-pro";
257    /// Constant for the Gemini 2.5 Flash model.
258    pub const GOOGLE_GEMINI_2_5_FLASH: &str = "gemini-2.5-flash";
259    /// Constant for the Gemini 2.5 Flash Lite model.
260    pub const GOOGLE_GEMINI_2_5_FLASH_LITE: &str = "gemini-2.5-flash-lite";
261
262    // Gemini 3 series - Advanced reasoning models with thinking capabilities
263    /// Constant for the Gemini 3 Pro model with advanced reasoning.
264    pub const GOOGLE_GEMINI_3_PRO: &str = "gemini-3-pro-preview";
265    /// Constant for the Gemini 3 Flash model with thinking capabilities.
266    pub const GOOGLE_GEMINI_3_FLASH: &str = "gemini-3-flash-preview";
267    /// Constant for the Gemini 3 Pro Image model with multimodal reasoning.
268    pub const GOOGLE_GEMINI_3_PRO_IMAGE: &str = "gemini-3-pro-image-preview";
269
270    // Groq models (fully compatible)
271    /// Constant for the Llama 3.1 8B Instant model.
272    pub const GROQ_LLAMA_3_1_8B_INSTANT: &str = "llama-3.1-8b-instant";
273    /// Constant for the Llama 3.3 70B Versatile model.
274    pub const GROQ_LLAMA_3_3_70B_VERSATILE: &str = "llama-3.3-70b-versatile";
275    /// Constant for the moonshotai/kimi-k2-instruct-0905 Instant model.
276    pub const KIMI_K2_0925: &str = "moonshotai/kimi-k2-instruct-0905";
277
278    // Cerebras models (fully compatible)
279    /// Constant for the Llama3.1 8B model.
280    pub const CEREBRAS_LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
281
282    // Enosis Labs models (fully compatible)
283    /// Constant for the Astronomer 1 model.
284    pub const ASTRONOMER_1: &str = "astronomer-1";
285    /// Constant for the Astronomer 1 Max model.
286    pub const ASTRONOMER_1_MAX: &str = "astronomer-1-max";
287    /// Constant for the Astronomer 1.5 model.
288    pub const ASTRONOMER_1_5: &str = "astronomer-1.5";
289    /// Constant for the Astronomer 2 model.
290    pub const ASTRONOMER_2: &str = "astronomer-2";
291    /// Constant for the Astronomer 2 Pro model.
292    pub const ASTRONOMER_2_PRO: &str = "astronomer-2-pro";
293
294    // Legacy aliases for backward compatibility (deprecated - use provider-prefixed versions above)
295    /// Legacy constant for the GPT-4o model (use OPENAI_GPT_4O instead).
296    #[deprecated(note = "Use OPENAI_GPT_4O instead for OpenAI compatibility")]
297    pub const GPT_4O: &str = "openai/gpt-4o";
298    /// Legacy constant for the GPT-5 model (use OPENAI_GPT_5 instead).
299    #[deprecated(note = "Use OPENAI_GPT_5 instead for OpenAI compatibility")]
300    pub const GPT_5: &str = "openai/gpt-5";
301    /// Legacy constant for the Gemini 2.5 Pro model (use GOOGLE_GEMINI_2_5_PRO instead).
302    #[deprecated(note = "Use GOOGLE_GEMINI_2_5_PRO instead for OpenAI compatibility")]
303    pub const GEMINI_2_5_PRO: &str = "google/gemini-2.5-pro";
304    /// Legacy constant for the Gemini 2.5 Flash model (use GOOGLE_GEMINI_2_5_FLASH instead).
305    #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH instead for OpenAI compatibility")]
306    pub const GEMINI_2_5_FLASH: &str = "google/gemini-2.5-flash";
307    /// Legacy constant for the Gemini 2.5 Flash Lite model (use GOOGLE_GEMINI_2_5_FLASH_LITE instead).
308    #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH_LITE instead for OpenAI compatibility")]
309    pub const GEMINI_2_5_FLASH_LITE: &str = "google/gemini-2.5-flash-lite";
310    /// Legacy constant for the Llama 3.1 8B Instant model (use GROQ_LLAMA_3_1_8B_INSTANT instead).
311    #[deprecated(note = "Use GROQ_LLAMA_3_1_8B_INSTANT instead for OpenAI compatibility")]
312    pub const LLAMA_3_1_8B_INSTANT: &str = "groq/llama-3.1-8b-instant";
313    /// Legacy constant for the Llama3.1 8B model (use CEREBRAS_LLAMA3_1_8B instead).
314    #[deprecated(note = "Use CEREBRAS_LLAMA3_1_8B instead for OpenAI compatibility")]
315    pub const LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
316}
317
318/// A collection of predefined provider name constants for convenience.
319pub mod providers {
320    /// Constant for the OpenAI provider.
321    pub const OPENAI: &str = "openai";
322    /// Constant for the Anthropic provider.
323    pub const ANTHROPIC: &str = "anthropic";
324    /// Constant for the Groq provider.
325    pub const GROQ: &str = "groq";
326    /// Constant for the Cerebras provider.
327    pub const CEREBRAS: &str = "cerebras";
328    /// Constant for the Gemini provider.
329    pub const GEMINI: &str = "gemini";
330    /// Constant for the Enosis Labs provider.
331    pub const ENOSISLABS: &str = "enosislabs";
332}
333
334impl ChatCompletionRequest {
335    /// Creates a new `ChatCompletionRequest` with the given model and messages.
336    ///
337    /// # Arguments
338    ///
339    /// * `model` - The identifier of the model to use.
340    /// * `messages` - The list of messages for the conversation.
341    pub fn new(model: impl Into<String>, messages: Vec<ChatMessage>) -> Self {
342        Self {
343            model: model.into(),
344            messages,
345            temperature: None,
346            max_tokens: None,
347            top_p: None,
348            frequency_penalty: None,
349            presence_penalty: None,
350            stop: None,
351            user: None,
352            provider: None,
353            stream: None,
354            logit_bias: None,
355            logprobs: None,
356            top_logprobs: None,
357            n: None,
358            response_format: None,
359            tools: None,
360            tool_choice: None,
361            thinking_config: None,
362        }
363    }
364
365    /// Sets the temperature for the chat completion.
366    ///
367    /// The temperature is clamped between 0.0 and 2.0.
368    ///
369    /// # Arguments
370    ///
371    /// * `temperature` - The sampling temperature.
372    pub fn with_temperature(mut self, temperature: f32) -> Self {
373        self.temperature = Some(temperature.clamp(0.0, 2.0));
374        self
375    }
376
377    /// Sets the maximum number of tokens to generate.
378    ///
379    /// # Arguments
380    ///
381    /// * `max_tokens` - The maximum number of tokens.
382    pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
383        self.max_tokens = Some(max_tokens);
384        self
385    }
386
387    /// Sets the user identifier for the chat completion.
388    ///
389    /// # Arguments
390    ///
391    /// * `user` - A unique identifier for the end-user.
392    pub fn with_user(mut self, user: impl Into<String>) -> Self {
393        self.user = Some(user.into());
394        self
395    }
396
397    /// Sets a provider hint for the request.
398    ///
399    /// # Arguments
400    ///
401    /// * `provider` - The name of the provider to use.
402    pub fn with_provider(mut self, provider: impl Into<String>) -> Self {
403        self.provider = Some(provider.into());
404        self
405    }
406
407    /// Enables or disables streaming for the response.
408    ///
409    /// # Arguments
410    ///
411    /// * `stream` - `true` to enable streaming, `false` to disable.
412    pub fn with_stream(mut self, stream: bool) -> Self {
413        self.stream = Some(stream);
414        self
415    }
416
417    /// Sets the logit bias for the chat completion.
418    ///
419    /// # Arguments
420    ///
421    /// * `logit_bias` - A map of token IDs to bias values.
422    pub fn with_logit_bias(mut self, logit_bias: serde_json::Value) -> Self {
423        self.logit_bias = Some(logit_bias);
424        self
425    }
426
427    /// Enables or disables log probabilities for the response.
428    ///
429    /// # Arguments
430    ///
431    /// * `logprobs` - `true` to include log probabilities.
432    pub fn with_logprobs(mut self, logprobs: bool) -> Self {
433        self.logprobs = Some(logprobs);
434        self
435    }
436
437    /// Sets the number of most likely tokens to return at each position.
438    ///
439    /// # Arguments
440    ///
441    /// * `top_logprobs` - The number of top log probabilities to return.
442    pub fn with_top_logprobs(mut self, top_logprobs: u32) -> Self {
443        self.top_logprobs = Some(top_logprobs);
444        self
445    }
446
447    /// Sets the number of chat completion choices to generate.
448    ///
449    /// # Arguments
450    ///
451    /// * `n` - The number of completions to generate.
452    pub fn with_n(mut self, n: u32) -> Self {
453        self.n = Some(n);
454        self
455    }
456
457    /// Sets the response format for the chat completion.
458    ///
459    /// # Arguments
460    ///
461    /// * `response_format` - The format the model must output.
462    pub fn with_response_format(mut self, response_format: ResponseFormat) -> Self {
463        self.response_format = Some(response_format);
464        self
465    }
466
467    /// Sets the tools available to the model.
468    ///
469    /// # Arguments
470    ///
471    /// * `tools` - A list of tools the model can use.
472    pub fn with_tools(mut self, tools: Vec<Tool>) -> Self {
473        self.tools = Some(tools);
474        self
475    }
476
477    /// Sets the tool choice for the chat completion.
478    ///
479    /// # Arguments
480    ///
481    /// * `tool_choice` - Controls which tool the model uses.
482    pub fn with_tool_choice(mut self, tool_choice: ToolChoice) -> Self {
483        self.tool_choice = Some(tool_choice);
484        self
485    }
486
487    /// Sets the thinking configuration for Gemini 3 and 2.5 series models.
488    ///
489    /// # Arguments
490    ///
491    /// * `thinking_config` - Configuration for thinking capabilities.
492    pub fn with_thinking_config(mut self, thinking_config: ThinkingConfig) -> Self {
493        self.thinking_config = Some(thinking_config);
494        self
495    }
496
497    /// Enables thought summaries in the response (Gemini 3 and 2.5 series).
498    ///
499    /// # Arguments
500    ///
501    /// * `include_thoughts` - Whether to include thought summaries.
502    pub fn with_include_thoughts(mut self, include_thoughts: bool) -> Self {
503        let mut config = self.thinking_config.unwrap_or_default();
504        config.include_thoughts = Some(include_thoughts);
505        self.thinking_config = Some(config);
506        self
507    }
508
509    /// Sets the thinking level for Gemini 3 models.
510    ///
511    /// # Arguments
512    ///
513    /// * `thinking_level` - The thinking level (minimal, low, medium, high).
514    pub fn with_thinking_level(mut self, thinking_level: ThinkingLevel) -> Self {
515        let mut config = self.thinking_config.unwrap_or_default();
516        config.thinking_level = Some(thinking_level);
517        self.thinking_config = Some(config);
518        self
519    }
520
521    /// Sets the thinking budget for Gemini 2.5 models.
522    ///
523    /// # Arguments
524    ///
525    /// * `thinking_budget` - Number of thinking tokens (-1 for dynamic, 0 to disable).
526    pub fn with_thinking_budget(mut self, thinking_budget: i32) -> Self {
527        let mut config = self.thinking_config.unwrap_or_default();
528        config.thinking_budget = Some(thinking_budget);
529        self.thinking_config = Some(config);
530        self
531    }
532
533    /// Validates that the request parameters are compatible with OpenAI standards.
534    ///
535    /// This method checks parameter ranges and values to ensure they match OpenAI's API specifications.
536    /// Also validates Gemini 3 specific parameters like thinking configuration.
537    ///
538    /// # Returns
539    ///
540    /// A `Result` indicating whether the request is valid for OpenAI compatibility.
541    pub fn validate_openai_compatibility(&self) -> Result<(), String> {
542        // Validate temperature
543        if let Some(temp) = self.temperature {
544            if !(0.0..=2.0).contains(&temp) {
545                return Err(format!(
546                    "Temperature must be between 0.0 and 2.0, got {}",
547                    temp
548                ));
549            }
550        }
551
552        // Validate top_p
553        if let Some(top_p) = self.top_p {
554            if !(0.0..=1.0).contains(&top_p) {
555                return Err(format!("Top-p must be between 0.0 and 1.0, got {}", top_p));
556            }
557        }
558
559        // Validate frequency_penalty
560        if let Some(fp) = self.frequency_penalty {
561            if !(-2.0..=2.0).contains(&fp) {
562                return Err(format!(
563                    "Frequency penalty must be between -2.0 and 2.0, got {}",
564                    fp
565                ));
566            }
567        }
568
569        // Validate presence_penalty
570        if let Some(pp) = self.presence_penalty {
571            if !(-2.0..=2.0).contains(&pp) {
572                return Err(format!(
573                    "Presence penalty must be between -2.0 and 2.0, got {}",
574                    pp
575                ));
576            }
577        }
578
579        // Validate max_tokens
580        if let Some(mt) = self.max_tokens {
581            if mt == 0 {
582                return Err("Max tokens must be greater than 0".to_string());
583            }
584        }
585
586        // Validate top_logprobs
587        if let Some(tlp) = self.top_logprobs {
588            if !(0..=20).contains(&tlp) {
589                return Err(format!(
590                    "Top logprobs must be between 0 and 20, got {}",
591                    tlp
592                ));
593            }
594        }
595
596        // Validate n
597        if let Some(n) = self.n {
598            if n == 0 {
599                return Err("n must be greater than 0".to_string());
600            }
601        }
602
603        // Validate stop sequences
604        if let Some(stop) = &self.stop {
605            if stop.len() > 4 {
606                return Err("Cannot have more than 4 stop sequences".to_string());
607            }
608            for seq in stop {
609                if seq.is_empty() {
610                    return Err("Stop sequences cannot be empty".to_string());
611                }
612                if seq.len() > 64 {
613                    return Err("Stop sequences cannot be longer than 64 characters".to_string());
614                }
615            }
616        }
617
618        // Validate thinking configuration for Gemini models
619        if let Some(thinking_config) = &self.thinking_config {
620            self.validate_thinking_config(thinking_config)?;
621        }
622
623        Ok(())
624    }
625
626    /// Validates thinking configuration parameters for Gemini models.
627    fn validate_thinking_config(&self, config: &ThinkingConfig) -> Result<(), String> {
628        let is_gemini_3 = self.model.contains("gemini-3");
629        let is_gemini_2_5 = self.model.contains("gemini-2.5");
630        let is_gemini_3_pro = self.model.contains("gemini-3-pro");
631
632        // Validate thinking level (Gemini 3 only)
633        if let Some(level) = &config.thinking_level {
634            if !is_gemini_3 {
635                return Err("thinking_level is only supported for Gemini 3 models".to_string());
636            }
637
638            match level {
639                ThinkingLevel::Minimal | ThinkingLevel::Medium => {
640                    if is_gemini_3_pro {
641                        return Err(
642                            "Gemini 3 Pro only supports 'low' and 'high' thinking levels"
643                                .to_string(),
644                        );
645                    }
646                }
647                _ => {}
648            }
649        }
650
651        // Validate thinking budget (Gemini 2.5 only)
652        if let Some(budget) = config.thinking_budget {
653            if !is_gemini_2_5 {
654                return Err("thinking_budget is only supported for Gemini 2.5 models".to_string());
655            }
656
657            // Validate budget ranges based on model
658            if self.model.contains("2.5-pro") {
659                if budget != -1 && !(128..=32768).contains(&budget) {
660                    return Err(
661                        "Gemini 2.5 Pro thinking budget must be -1 (dynamic) or between 128-32768"
662                            .to_string(),
663                    );
664                }
665            } else if self.model.contains("2.5-flash")
666                && budget != -1
667                && !(0..=24576).contains(&budget)
668            {
669                return Err(
670                    "Gemini 2.5 Flash thinking budget must be -1 (dynamic) or between 0-24576"
671                        .to_string(),
672                );
673            }
674        }
675
676        // Warn about conflicting parameters
677        if config.thinking_level.is_some() && config.thinking_budget.is_some() {
678            return Err("Cannot specify both thinking_level (Gemini 3) and thinking_budget (Gemini 2.5) in the same request".to_string());
679        }
680
681        Ok(())
682    }
683
684    /// Checks if the model supports thinking capabilities.
685    pub fn supports_thinking(&self) -> bool {
686        self.model.contains("gemini-3") || self.model.contains("gemini-2.5")
687    }
688
689    /// Checks if the model requires thought signatures for function calling.
690    pub fn requires_thought_signatures(&self) -> bool {
691        self.model.contains("gemini-3")
692    }
693}
694
695impl ChatMessage {
696    /// Creates a new message with the `System` role.
697    ///
698    /// # Arguments
699    ///
700    /// * `content` - The content of the system message.
701    pub fn system(content: impl Into<String>) -> Self {
702        Self {
703            role: MessageRole::System,
704            content: content.into(),
705        }
706    }
707
708    /// Creates a new message with the `User` role.
709    ///
710    /// # Arguments
711    ///
712    /// * `content` - The content of the user message.
713    pub fn user(content: impl Into<String>) -> Self {
714        Self {
715            role: MessageRole::User,
716            content: content.into(),
717        }
718    }
719
720    /// Creates a new message with the `Assistant` role.
721    ///
722    /// # Arguments
723    ///
724    /// * `content` - The content of the assistant message.
725    pub fn assistant(content: impl Into<String>) -> Self {
726        Self {
727            role: MessageRole::Assistant,
728            content: content.into(),
729        }
730    }
731}
732
733// Legacy compatibility types - keep existing types for backward compatibility
734use uuid::Uuid;
735
736/// Represents a user account (legacy).
737#[derive(Debug, Clone, Serialize, Deserialize)]
738pub struct User {
739    /// The unique ID of the user.
740    pub id: Uuid,
741    /// The user's identifier string.
742    pub user_id: String,
743    /// The name of the user's subscription plan.
744    pub plan_name: String,
745    /// The user's current credit balance.
746    pub current_credits: f64,
747    /// The amount of credits the user has used in the current month.
748    pub credits_used_this_month: f64,
749    /// The date when the user's credits will reset.
750    pub credits_reset_date: DateTime<Utc>,
751    /// Indicates if the user account is active.
752    pub is_active: bool,
753    /// The timestamp of when the user account was created.
754    pub created_at: DateTime<Utc>,
755}
756
757/// Represents an API key (legacy).
758#[derive(Debug, Clone, Serialize, Deserialize)]
759pub struct ApiKey {
760    /// The unique ID of the API key.
761    pub id: Uuid,
762    /// The API key string.
763    pub key: String,
764    /// The ID of the user who owns the key.
765    pub owner_id: Uuid,
766    /// Indicates if the API key is active.
767    pub is_active: bool,
768    /// The timestamp of when the key was created.
769    pub created_at: DateTime<Utc>,
770    /// The expiration date of the key, if any.
771    pub expires_at: Option<DateTime<Utc>>,
772    /// A description of the key.
773    pub description: Option<String>,
774    /// The timestamp of when the key was last used.
775    pub last_used_at: Option<DateTime<Utc>>,
776}
777
778/// Represents usage statistics over a period (legacy).
779#[derive(Debug, Clone, Serialize, Deserialize)]
780pub struct UsageStats {
781    /// The number of days in the usage period.
782    pub period_days: u32,
783    /// A list of daily usage data.
784    pub daily_usage: Vec<DailyUsage>,
785    /// A list of recent credit transactions.
786    pub recent_transactions: Vec<CreditTransaction>,
787    /// The total number of requests made in the period.
788    pub total_requests: u64,
789    /// The total number of tokens used in the period.
790    pub total_tokens: u64,
791}
792
793/// Represents usage data for a single day (legacy).
794#[derive(Debug, Clone, Serialize, Deserialize)]
795pub struct DailyUsage {
796    /// The date for the usage data.
797    pub date: String,
798    /// The number of credits used on this day.
799    pub credits_used: f64,
800    /// The number of requests made on this day.
801    pub requests: u64,
802    /// The number of tokens used on this day.
803    pub tokens: u64,
804}
805
806/// Represents a single credit transaction (legacy).
807#[derive(Debug, Clone, Serialize, Deserialize)]
808pub struct CreditTransaction {
809    /// The unique ID of the transaction.
810    pub id: Uuid,
811    /// The type of the transaction.
812    pub transaction_type: TransactionType,
813    /// The amount of credits involved in the transaction.
814    pub credits_amount: f64,
815    /// The credit balance after the transaction.
816    pub credits_balance_after: f64,
817    /// The provider associated with the transaction, if any.
818    pub provider: Option<String>,
819    /// The model associated with the transaction, if any.
820    pub model: Option<String>,
821    /// A description of the transaction.
822    pub description: String,
823    /// The timestamp of when the transaction occurred.
824    pub created_at: DateTime<Utc>,
825}
826
827/// The type of credit transaction (legacy).
828#[derive(Debug, Clone, Serialize, Deserialize)]
829#[serde(rename_all = "lowercase")]
830pub enum TransactionType {
831    /// A transaction for API usage.
832    Usage,
833    /// A transaction for a credit reset.
834    Reset,
835    /// A transaction for a credit purchase.
836    Purchase,
837    /// A transaction for a credit refund.
838    Refund,
839}
840
841// Legacy aliases for backward compatibility
842/// A legacy type alias for `MessageRole`.
843pub type ChatRole = MessageRole;
844/// A legacy type alias for `Usage`.
845pub type ChatUsage = Usage;
846/// A legacy type alias for `HealthStatus`.
847pub type HealthCheck = HealthStatus;
848
849/// Represents the status of backend services (legacy).
850#[derive(Debug, Clone, Serialize, Deserialize)]
851pub struct HealthServices {
852    /// The status of the database connection.
853    pub database: bool,
854    /// The status of the Redis connection.
855    pub redis: bool,
856    /// The overall status of AI providers.
857    pub providers: bool,
858}
859
860/// The health status of the API (legacy).
861#[derive(Debug, Clone, Serialize, Deserialize)]
862#[serde(rename_all = "lowercase")]
863pub enum HealthStatusEnum {
864    /// The API is healthy.
865    Healthy,
866    /// The API is in a degraded state.
867    Degraded,
868    /// The API is unhealthy.
869    Unhealthy,
870    /// The API needs initialization.
871    NeedsInit,
872}
873
874/// Represents the format that the model must output.
875#[derive(Debug, Clone, Serialize, Deserialize)]
876#[serde(rename_all = "snake_case")]
877pub enum ResponseFormat {
878    /// The model can return text.
879    Text,
880    /// The model must return a valid JSON object.
881    JsonObject,
882    /// The model must return a JSON object that matches the provided schema.
883    JsonSchema {
884        /// The JSON Schema that the model's output must conform to.
885        json_schema: serde_json::Value,
886    },
887}
888
889/// Represents a tool that the model can use.
890#[derive(Debug, Clone, Serialize, Deserialize)]
891pub struct Tool {
892    /// The type of the tool (currently only "function" is supported).
893    pub r#type: ToolType,
894    /// The function definition describing the tool's capabilities.
895    pub function: FunctionDefinition,
896}
897
898/// The type of tool.
899#[derive(Debug, Clone, Serialize, Deserialize)]
900#[serde(rename_all = "snake_case")]
901pub enum ToolType {
902    /// A function tool.
903    Function,
904}
905
906/// Represents a function definition for a tool.
907#[derive(Debug, Clone, Serialize, Deserialize)]
908pub struct FunctionDefinition {
909    /// The name of the function.
910    pub name: String,
911    /// A description of what the function does.
912    #[serde(skip_serializing_if = "Option::is_none")]
913    pub description: Option<String>,
914    /// The parameters the function accepts, described as a JSON Schema object.
915    #[serde(skip_serializing_if = "Option::is_none")]
916    pub parameters: Option<serde_json::Value>,
917}
918
919/// Controls which tool is called by the model.
920#[derive(Debug, Clone, Serialize, Deserialize)]
921#[serde(untagged)]
922pub enum ToolChoice {
923    /// No tool is called.
924    None,
925    /// The model chooses which tool to call.
926    Auto,
927    /// A specific tool is called.
928    Tool {
929        /// The type of the tool being called.
930        r#type: ToolType,
931        /// The function to call within the tool.
932        function: ToolFunction,
933    },
934}
935
936/// Represents a tool function call.
937#[derive(Debug, Clone, Serialize, Deserialize)]
938pub struct ToolFunction {
939    /// The name of the function to call.
940    pub name: String,
941}
942
943/// Configuration for thinking capabilities in Gemini 3 and 2.5 series models.
944#[derive(Debug, Clone, Serialize, Deserialize, Default)]
945pub struct ThinkingConfig {
946    /// Whether to include thought summaries in the response.
947    #[serde(skip_serializing_if = "Option::is_none")]
948    pub include_thoughts: Option<bool>,
949
950    /// The thinking level for Gemini 3 models (low, high for Pro; minimal, low, medium, high for Flash).
951    #[serde(skip_serializing_if = "Option::is_none")]
952    pub thinking_level: Option<ThinkingLevel>,
953
954    /// The thinking budget for Gemini 2.5 models (number of thinking tokens).
955    #[serde(skip_serializing_if = "Option::is_none")]
956    pub thinking_budget: Option<i32>,
957}
958
959/// Thinking levels for Gemini 3 models.
960#[derive(Debug, Clone, Serialize, Deserialize)]
961#[serde(rename_all = "lowercase")]
962pub enum ThinkingLevel {
963    /// Minimal thinking (Gemini 3 Flash only) - model likely won't think.
964    Minimal,
965    /// Low thinking level - faster responses with basic reasoning.
966    Low,
967    /// Medium thinking level (Gemini 3 Flash only) - balanced reasoning and speed.
968    Medium,
969    /// High thinking level - deep reasoning for complex tasks (default).
970    High,
971}
972
973/// Represents a content part that may include thought signatures.
974#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
975pub struct ContentPart {
976    /// The text content of the part.
977    #[serde(skip_serializing_if = "Option::is_none")]
978    pub text: Option<String>,
979
980    /// Function call information if this part contains a function call.
981    #[serde(skip_serializing_if = "Option::is_none")]
982    pub function_call: Option<FunctionCall>,
983
984    /// Function response information if this part contains a function response.
985    #[serde(skip_serializing_if = "Option::is_none")]
986    pub function_response: Option<FunctionResponse>,
987
988    /// Indicates if this part contains thought content.
989    #[serde(skip_serializing_if = "Option::is_none")]
990    pub thought: Option<bool>,
991
992    /// Encrypted thought signature for preserving reasoning context across turns.
993    #[serde(skip_serializing_if = "Option::is_none")]
994    pub thought_signature: Option<String>,
995}
996
997/// Represents a function call in the content.
998#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
999pub struct FunctionCall {
1000    /// The name of the function being called.
1001    pub name: String,
1002    /// The arguments for the function call as a JSON object.
1003    pub args: serde_json::Value,
1004}
1005
1006/// Represents a function response in the content.
1007#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1008pub struct FunctionResponse {
1009    /// The name of the function that was called.
1010    pub name: String,
1011    /// The response from the function call.
1012    pub response: serde_json::Value,
1013}
1014
1015/// Enhanced chat message that supports Gemini 3 thinking capabilities.
1016#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1017pub struct EnhancedChatMessage {
1018    /// The role of the message author.
1019    pub role: MessageRole,
1020    /// The content parts of the message (supports text, function calls, and thought signatures).
1021    pub parts: Vec<ContentPart>,
1022}
1023
1024/// Enhanced usage statistics that include thinking tokens.
1025#[derive(Debug, Clone, Serialize, Deserialize)]
1026pub struct EnhancedUsage {
1027    /// The number of tokens in the prompt.
1028    pub prompt_tokens: u32,
1029    /// The number of tokens in the generated completion.
1030    pub completion_tokens: u32,
1031    /// The total number of tokens used in the request (prompt + completion).
1032    pub total_tokens: u32,
1033    /// The number of thinking tokens used (Gemini 3 and 2.5 series).
1034    #[serde(skip_serializing_if = "Option::is_none")]
1035    pub thoughts_token_count: Option<u32>,
1036}
1037
1038impl ThinkingConfig {
1039    /// Creates a new thinking configuration with default values.
1040    pub fn new() -> Self {
1041        Self::default()
1042    }
1043
1044    /// Creates a configuration for Gemini 3 models with specified thinking level.
1045    ///
1046    /// # Arguments
1047    ///
1048    /// * `level` - The thinking level to use.
1049    /// * `include_thoughts` - Whether to include thought summaries.
1050    pub fn gemini_3(level: ThinkingLevel, include_thoughts: bool) -> Self {
1051        Self {
1052            thinking_level: Some(level),
1053            include_thoughts: Some(include_thoughts),
1054            thinking_budget: None,
1055        }
1056    }
1057
1058    /// Creates a configuration for Gemini 2.5 models with specified thinking budget.
1059    ///
1060    /// # Arguments
1061    ///
1062    /// * `budget` - The thinking budget (-1 for dynamic, 0 to disable, or specific token count).
1063    /// * `include_thoughts` - Whether to include thought summaries.
1064    pub fn gemini_2_5(budget: i32, include_thoughts: bool) -> Self {
1065        Self {
1066            thinking_budget: Some(budget),
1067            include_thoughts: Some(include_thoughts),
1068            thinking_level: None,
1069        }
1070    }
1071
1072    /// Creates a configuration optimized for complex reasoning tasks.
1073    pub fn high_reasoning() -> Self {
1074        Self {
1075            thinking_level: Some(ThinkingLevel::High),
1076            include_thoughts: Some(true),
1077            thinking_budget: Some(-1), // Dynamic for 2.5 models
1078        }
1079    }
1080
1081    /// Creates a configuration optimized for fast responses.
1082    pub fn fast_response() -> Self {
1083        Self {
1084            thinking_level: Some(ThinkingLevel::Low),
1085            include_thoughts: Some(false),
1086            thinking_budget: Some(512), // Low budget for 2.5 models
1087        }
1088    }
1089}
1090
1091impl ContentPart {
1092    /// Creates a new text content part.
1093    pub fn text(content: impl Into<String>) -> Self {
1094        Self {
1095            text: Some(content.into()),
1096            function_call: None,
1097            function_response: None,
1098            thought: None,
1099            thought_signature: None,
1100        }
1101    }
1102
1103    /// Creates a new function call content part.
1104    pub fn function_call(name: impl Into<String>, args: serde_json::Value) -> Self {
1105        Self {
1106            text: None,
1107            function_call: Some(FunctionCall {
1108                name: name.into(),
1109                args,
1110            }),
1111            function_response: None,
1112            thought: None,
1113            thought_signature: None,
1114        }
1115    }
1116
1117    /// Creates a new function response content part.
1118    pub fn function_response(name: impl Into<String>, response: serde_json::Value) -> Self {
1119        Self {
1120            text: None,
1121            function_call: None,
1122            function_response: Some(FunctionResponse {
1123                name: name.into(),
1124                response,
1125            }),
1126            thought: None,
1127            thought_signature: None,
1128        }
1129    }
1130
1131    /// Adds a thought signature to this content part.
1132    pub fn with_thought_signature(mut self, signature: impl Into<String>) -> Self {
1133        self.thought_signature = Some(signature.into());
1134        self
1135    }
1136
1137    /// Marks this content part as containing thought content.
1138    pub fn as_thought(mut self) -> Self {
1139        self.thought = Some(true);
1140        self
1141    }
1142}
1143
1144impl EnhancedChatMessage {
1145    /// Creates a new enhanced message with the `System` role.
1146    pub fn system(content: impl Into<String>) -> Self {
1147        Self {
1148            role: MessageRole::System,
1149            parts: vec![ContentPart::text(content)],
1150        }
1151    }
1152
1153    /// Creates a new enhanced message with the `User` role.
1154    pub fn user(content: impl Into<String>) -> Self {
1155        Self {
1156            role: MessageRole::User,
1157            parts: vec![ContentPart::text(content)],
1158        }
1159    }
1160
1161    /// Creates a new enhanced message with the `Assistant` role.
1162    pub fn assistant(content: impl Into<String>) -> Self {
1163        Self {
1164            role: MessageRole::Assistant,
1165            parts: vec![ContentPart::text(content)],
1166        }
1167    }
1168
1169    /// Creates a new enhanced message with multiple content parts.
1170    pub fn with_parts(role: MessageRole, parts: Vec<ContentPart>) -> Self {
1171        Self { role, parts }
1172    }
1173}
1174
1175/// Represents a streaming chat completion response (OpenAI delta format).
1176#[derive(Debug, Clone, Serialize, Deserialize)]
1177pub struct ChatCompletionStreamResponse {
1178    /// A unique identifier for the chat completion.
1179    pub id: String,
1180    /// The type of object, which is always "chat.completion.chunk".
1181    pub object: String,
1182    /// The Unix timestamp (in seconds) of when the completion was created.
1183    pub created: u64,
1184    /// The model that was used for the completion.
1185    pub model: String,
1186    /// A list of chat completion choices.
1187    pub choices: Vec<ChatCompletionStreamChoice>,
1188    /// Information about the token usage for this completion (only present in the final chunk).
1189    #[serde(skip_serializing_if = "Option::is_none")]
1190    pub usage: Option<Usage>,
1191}
1192
1193/// Represents a single choice in a streaming chat completion response.
1194#[derive(Debug, Clone, Serialize, Deserialize)]
1195pub struct ChatCompletionStreamChoice {
1196    /// The index of the choice in the list of choices.
1197    pub index: u32,
1198    /// The delta containing the new content for this choice.
1199    pub delta: ChatCompletionStreamDelta,
1200    /// The reason the model stopped generating tokens (only present in the final chunk).
1201    #[serde(skip_serializing_if = "Option::is_none")]
1202    pub finish_reason: Option<String>,
1203}
1204
1205/// Represents the delta (change) in a streaming chat completion response.
1206#[derive(Debug, Clone, Serialize, Deserialize)]
1207pub struct ChatCompletionStreamDelta {
1208    /// The role of the message (only present in the first chunk).
1209    #[serde(skip_serializing_if = "Option::is_none")]
1210    pub role: Option<String>,
1211    /// The new content for this chunk.
1212    #[serde(skip_serializing_if = "Option::is_none")]
1213    pub content: Option<String>,
1214    /// Tool calls for this chunk (if any).
1215    #[serde(skip_serializing_if = "Option::is_none")]
1216    pub tool_calls: Option<Vec<ToolCall>>,
1217}
1218
1219/// Represents a tool call in a streaming response.
1220#[derive(Debug, Clone, Serialize, Deserialize)]
1221pub struct ToolCall {
1222    /// The index of the tool call.
1223    pub index: u32,
1224    /// The ID of the tool call.
1225    #[serde(skip_serializing_if = "Option::is_none")]
1226    pub id: Option<String>,
1227    /// The type of the tool call.
1228    #[serde(skip_serializing_if = "Option::is_none")]
1229    pub r#type: Option<String>,
1230    /// The function being called.
1231    #[serde(skip_serializing_if = "Option::is_none")]
1232    pub function: Option<ToolCallFunction>,
1233}
1234
1235/// Represents a function call in a tool call.
1236#[derive(Debug, Clone, Serialize, Deserialize)]
1237pub struct ToolCallFunction {
1238    /// The name of the function.
1239    #[serde(skip_serializing_if = "Option::is_none")]
1240    pub name: Option<String>,
1241    /// The arguments for the function.
1242    #[serde(skip_serializing_if = "Option::is_none")]
1243    pub arguments: Option<String>,
1244}