Skip to main content

rainy_sdk/
models.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4
5/// Represents a single message in a chat conversation.
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
7pub struct ChatMessage {
8    /// The role of the message author.
9    pub role: MessageRole,
10    /// The content of the message.
11    pub content: String,
12}
13
14/// The role of a message's author.
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
16#[serde(rename_all = "lowercase")]
17pub enum MessageRole {
18    /// A message from the system, setting the context or instructions for the assistant.
19    System,
20    /// A message from the user.
21    User,
22    /// A message from the assistant.
23    Assistant,
24}
25
26/// The search provider to use for web research.
27#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
28#[serde(rename_all = "lowercase")]
29pub enum ResearchProvider {
30    /// Use Exa (formerly Metaphor) for high-quality semantic search.
31    #[default]
32    Exa,
33    /// Use Tavily for comprehensive web search and content extraction.
34    Tavily,
35    /// Automatically select the best provider based on the query.
36    Auto,
37}
38
39/// The depth of the research operation.
40#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
41#[serde(rename_all = "lowercase")]
42pub enum ResearchDepth {
43    /// Basic search (faster, lower cost).
44    #[default]
45    Basic,
46    /// Deep search (more thorough, higher cost, includes more context).
47    Advanced,
48}
49
50/// Represents a request to create a chat completion.
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct ChatCompletionRequest {
53    /// The identifier of the model to use for the completion (e.g., "gpt-4o", "claude-sonnet-4").
54    pub model: String,
55
56    /// A list of messages that form the conversation history.
57    pub messages: Vec<ChatMessage>,
58
59    /// The sampling temperature to use, between 0.0 and 2.0. Higher values will make the output
60    /// more random, while lower values will make it more focused and deterministic.
61    #[serde(skip_serializing_if = "Option::is_none")]
62    pub temperature: Option<f32>,
63
64    /// The maximum number of tokens to generate in the completion.
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub max_tokens: Option<u32>,
67
68    /// The nucleus sampling parameter. The model considers the results of the tokens with `top_p`
69    /// probability mass. So, 0.1 means only the tokens comprising the top 10% probability mass are considered.
70    #[serde(skip_serializing_if = "Option::is_none")]
71    pub top_p: Option<f32>,
72
73    /// A penalty applied to new tokens based on their frequency in the text so far.
74    /// It decreases the model's likelihood to repeat the same line verbatim.
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub frequency_penalty: Option<f32>,
77
78    /// A penalty applied to new tokens based on whether they appear in the text so far.
79    /// It increases the model's likelihood to talk about new topics.
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub presence_penalty: Option<f32>,
82
83    /// A list of sequences that will cause the model to stop generating further tokens.
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub stop: Option<Vec<String>>,
86
87    /// A unique identifier representing your end-user, which can help in monitoring and
88    /// tracking conversations.
89    #[serde(skip_serializing_if = "Option::is_none")]
90    pub user: Option<String>,
91
92    /// A hint to the router about which provider to use for the model.
93    #[serde(skip_serializing_if = "Option::is_none")]
94    pub provider: Option<String>,
95
96    /// If set to `true`, the response will be streamed as a series of events.
97    #[serde(skip_serializing_if = "Option::is_none")]
98    pub stream: Option<bool>,
99
100    /// Modify the likelihood of specified tokens appearing in the completion.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub logit_bias: Option<serde_json::Value>,
103
104    /// Whether to return log probabilities of the output tokens.
105    #[serde(skip_serializing_if = "Option::is_none")]
106    pub logprobs: Option<bool>,
107
108    /// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position.
109    #[serde(skip_serializing_if = "Option::is_none")]
110    pub top_logprobs: Option<u32>,
111
112    /// How many chat completion choices to generate for each input message.
113    #[serde(skip_serializing_if = "Option::is_none")]
114    pub n: Option<u32>,
115
116    /// An object specifying the format that the model must output.
117    #[serde(skip_serializing_if = "Option::is_none")]
118    pub response_format: Option<ResponseFormat>,
119
120    /// A list of tools the model may call.
121    #[serde(skip_serializing_if = "Option::is_none")]
122    pub tools: Option<Vec<Tool>>,
123
124    /// Controls which (if any) tool is called by the model.
125    #[serde(skip_serializing_if = "Option::is_none")]
126    pub tool_choice: Option<ToolChoice>,
127
128    /// Configuration for thinking capabilities (Gemini 3 and 2.5 series).
129    #[serde(skip_serializing_if = "Option::is_none")]
130    pub thinking_config: Option<ThinkingConfig>,
131}
132
133/// Represents the response from a chat completion request.
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct ChatCompletionResponse {
136    /// A unique identifier for the chat completion.
137    pub id: String,
138
139    /// The type of object, which is always "chat.completion".
140    pub object: String,
141
142    /// The Unix timestamp (in seconds) of when the completion was created.
143    pub created: u64,
144
145    /// The model that was used for the completion.
146    pub model: String,
147
148    /// A list of chat completion choices.
149    pub choices: Vec<ChatChoice>,
150
151    /// Information about the token usage for this completion.
152    #[serde(skip_serializing_if = "Option::is_none")]
153    pub usage: Option<Usage>,
154}
155
156/// Represents a single choice in a chat completion response.
157#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct ChatChoice {
159    /// The index of the choice in the list of choices.
160    pub index: u32,
161
162    /// The message generated by the model.
163    pub message: ChatMessage,
164
165    /// The reason the model stopped generating tokens.
166    pub finish_reason: String,
167}
168
169/// Represents the token usage statistics for a chat completion.
170#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct Usage {
172    /// The number of tokens in the prompt.
173    pub prompt_tokens: u32,
174
175    /// The number of tokens in the generated completion.
176    pub completion_tokens: u32,
177
178    /// The total number of tokens used in the request (prompt + completion).
179    pub total_tokens: u32,
180}
181
182/// Represents the health status of the Rainy API.
183#[derive(Debug, Clone, Serialize, Deserialize)]
184pub struct HealthStatus {
185    /// The overall status of the API (e.g., "healthy", "degraded").
186    pub status: String,
187
188    /// The timestamp of when the health check was performed.
189    pub timestamp: String,
190
191    /// The uptime of the system in seconds.
192    pub uptime: f64,
193
194    /// The status of individual services.
195    pub services: ServiceStatus,
196}
197
198/// Represents the status of individual backend services.
199#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct ServiceStatus {
201    /// The status of the database connection.
202    pub database: bool,
203
204    /// The status of the Redis connection, if applicable.
205    #[serde(skip_serializing_if = "Option::is_none")]
206    pub redis: Option<bool>,
207
208    /// The overall status of the connections to AI providers.
209    pub providers: bool,
210}
211
212/// Represents the available models and providers.
213#[derive(Debug, Clone, Serialize, Deserialize)]
214pub struct AvailableModels {
215    /// A map where keys are provider names and values are lists of model names.
216    pub providers: HashMap<String, Vec<String>>,
217
218    /// The total number of available models across all providers.
219    pub total_models: usize,
220
221    /// A list of provider names that are currently active and available.
222    pub active_providers: Vec<String>,
223}
224
225/// Represents information about credit usage for a request.
226#[derive(Debug, Clone, Serialize, Deserialize)]
227pub struct CreditInfo {
228    /// The number of credits available before the request.
229    pub current_credits: f64,
230
231    /// The estimated number of credits that the request will cost.
232    pub estimated_cost: f64,
233
234    /// The estimated number of credits remaining after the request.
235    pub credits_after_request: f64,
236
237    /// The date when the credit balance is next scheduled to be reset.
238    pub reset_date: String,
239}
240
241/// Represents metadata extracted from the response headers of an API request.
242#[derive(Debug, Clone)]
243pub struct RequestMetadata {
244    /// The time taken for the request to complete, in milliseconds.
245    pub response_time: Option<u64>,
246
247    /// The AI provider that handled the request.
248    pub provider: Option<String>,
249
250    /// The number of tokens used in the request.
251    pub tokens_used: Option<u32>,
252
253    /// The number of credits used for the request.
254    pub credits_used: Option<f64>,
255
256    /// The number of credits remaining after the request.
257    pub credits_remaining: Option<f64>,
258
259    /// The unique ID of the request, for tracking and debugging.
260    pub request_id: Option<String>,
261}
262
263/// A collection of predefined model constants for convenience.
264/// All models listed here are confirmed to be 100% OpenAI-compatible without parameter adaptations.
265pub mod model_constants {
266    // OpenAI models (fully compatible)
267    /// Constant for the GPT-4o model.
268    pub const OPENAI_GPT_4O: &str = "gpt-4o";
269    /// Constant for the GPT-5 model.
270    pub const OPENAI_GPT_5: &str = "gpt-5";
271    /// Constant for the GPT-5 Pro model.
272    pub const OPENAI_GPT_5_PRO: &str = "gpt-5-pro";
273    /// Constant for the O3 model.
274    pub const OPENAI_O3: &str = "o3";
275    /// Constant for the O4 Mini model.
276    pub const OPENAI_O4_MINI: &str = "o4-mini";
277
278    // Google Gemini models (fully compatible via official compatibility layer)
279    /// Constant for the Gemini 2.5 Pro model.
280    pub const GOOGLE_GEMINI_2_5_PRO: &str = "gemini-2.5-pro";
281    /// Constant for the Gemini 2.5 Flash model.
282    pub const GOOGLE_GEMINI_2_5_FLASH: &str = "gemini-2.5-flash";
283    /// Constant for the Gemini 2.5 Flash Lite model.
284    pub const GOOGLE_GEMINI_2_5_FLASH_LITE: &str = "gemini-2.5-flash-lite";
285
286    // Gemini 3 series - Advanced reasoning models with thinking capabilities
287    /// Constant for the Gemini 3 Pro model with advanced reasoning.
288    pub const GOOGLE_GEMINI_3_PRO: &str = "gemini-3-pro-preview";
289    /// Constant for the Gemini 3 Flash model with thinking capabilities.
290    pub const GOOGLE_GEMINI_3_FLASH: &str = "gemini-3-flash-preview";
291    /// Constant for the Gemini 3 Pro Image model with multimodal reasoning.
292    pub const GOOGLE_GEMINI_3_PRO_IMAGE: &str = "gemini-3-pro-image-preview";
293
294    // Groq models (fully compatible)
295    /// Constant for the Llama 3.1 8B Instant model.
296    pub const GROQ_LLAMA_3_1_8B_INSTANT: &str = "llama-3.1-8b-instant";
297    /// Constant for the Llama 3.3 70B Versatile model.
298    pub const GROQ_LLAMA_3_3_70B_VERSATILE: &str = "llama-3.3-70b-versatile";
299    /// Constant for the moonshotai/kimi-k2-instruct-0905 Instant model.
300    pub const KIMI_K2_0925: &str = "moonshotai/kimi-k2-instruct-0905";
301
302    // Cerebras models (fully compatible)
303    /// Constant for the Llama3.1 8B model.
304    pub const CEREBRAS_LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
305
306    // Enosis Labs models (fully compatible)
307    /// Constant for the Astronomer 1 model.
308    pub const ASTRONOMER_1: &str = "astronomer-1";
309    /// Constant for the Astronomer 1 Max model.
310    pub const ASTRONOMER_1_MAX: &str = "astronomer-1-max";
311    /// Constant for the Astronomer 1.5 model.
312    pub const ASTRONOMER_1_5: &str = "astronomer-1.5";
313    /// Constant for the Astronomer 2 model.
314    pub const ASTRONOMER_2: &str = "astronomer-2";
315    /// Constant for the Astronomer 2 Pro model.
316    pub const ASTRONOMER_2_PRO: &str = "astronomer-2-pro";
317
318    // Legacy aliases for backward compatibility (deprecated - use provider-prefixed versions above)
319    /// Legacy constant for the GPT-4o model (use OPENAI_GPT_4O instead).
320    #[deprecated(note = "Use OPENAI_GPT_4O instead for OpenAI compatibility")]
321    pub const GPT_4O: &str = "openai/gpt-4o";
322    /// Legacy constant for the GPT-5 model (use OPENAI_GPT_5 instead).
323    #[deprecated(note = "Use OPENAI_GPT_5 instead for OpenAI compatibility")]
324    pub const GPT_5: &str = "openai/gpt-5";
325    /// Legacy constant for the Gemini 2.5 Pro model (use GOOGLE_GEMINI_2_5_PRO instead).
326    #[deprecated(note = "Use GOOGLE_GEMINI_2_5_PRO instead for OpenAI compatibility")]
327    pub const GEMINI_2_5_PRO: &str = "google/gemini-2.5-pro";
328    /// Legacy constant for the Gemini 2.5 Flash model (use GOOGLE_GEMINI_2_5_FLASH instead).
329    #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH instead for OpenAI compatibility")]
330    pub const GEMINI_2_5_FLASH: &str = "google/gemini-2.5-flash";
331    /// Legacy constant for the Gemini 2.5 Flash Lite model (use GOOGLE_GEMINI_2_5_FLASH_LITE instead).
332    #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH_LITE instead for OpenAI compatibility")]
333    pub const GEMINI_2_5_FLASH_LITE: &str = "google/gemini-2.5-flash-lite";
334    /// Legacy constant for the Llama 3.1 8B Instant model (use GROQ_LLAMA_3_1_8B_INSTANT instead).
335    #[deprecated(note = "Use GROQ_LLAMA_3_1_8B_INSTANT instead for OpenAI compatibility")]
336    pub const LLAMA_3_1_8B_INSTANT: &str = "groq/llama-3.1-8b-instant";
337    /// Legacy constant for the Llama3.1 8B model (use CEREBRAS_LLAMA3_1_8B instead).
338    #[deprecated(note = "Use CEREBRAS_LLAMA3_1_8B instead for OpenAI compatibility")]
339    pub const LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
340}
341
342/// A collection of predefined provider name constants for convenience.
343pub mod providers {
344    /// Constant for the OpenAI provider.
345    pub const OPENAI: &str = "openai";
346    /// Constant for the Anthropic provider.
347    pub const ANTHROPIC: &str = "anthropic";
348    /// Constant for the Groq provider.
349    pub const GROQ: &str = "groq";
350    /// Constant for the Cerebras provider.
351    pub const CEREBRAS: &str = "cerebras";
352    /// Constant for the Gemini provider.
353    pub const GEMINI: &str = "gemini";
354    /// Constant for the Enosis Labs provider.
355    pub const ENOSISLABS: &str = "enosislabs";
356}
357
358impl ChatCompletionRequest {
359    /// Creates a new `ChatCompletionRequest` with the given model and messages.
360    ///
361    /// # Arguments
362    ///
363    /// * `model` - The identifier of the model to use.
364    /// * `messages` - The list of messages for the conversation.
365    pub fn new(model: impl Into<String>, messages: Vec<ChatMessage>) -> Self {
366        Self {
367            model: model.into(),
368            messages,
369            temperature: None,
370            max_tokens: None,
371            top_p: None,
372            frequency_penalty: None,
373            presence_penalty: None,
374            stop: None,
375            user: None,
376            provider: None,
377            stream: None,
378            logit_bias: None,
379            logprobs: None,
380            top_logprobs: None,
381            n: None,
382            response_format: None,
383            tools: None,
384            tool_choice: None,
385            thinking_config: None,
386        }
387    }
388
389    /// Sets the temperature for the chat completion.
390    ///
391    /// The temperature is clamped between 0.0 and 2.0.
392    ///
393    /// # Arguments
394    ///
395    /// * `temperature` - The sampling temperature.
396    pub fn with_temperature(mut self, temperature: f32) -> Self {
397        self.temperature = Some(temperature.clamp(0.0, 2.0));
398        self
399    }
400
401    /// Sets the maximum number of tokens to generate.
402    ///
403    /// # Arguments
404    ///
405    /// * `max_tokens` - The maximum number of tokens.
406    pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
407        self.max_tokens = Some(max_tokens);
408        self
409    }
410
411    /// Sets the user identifier for the chat completion.
412    ///
413    /// # Arguments
414    ///
415    /// * `user` - A unique identifier for the end-user.
416    pub fn with_user(mut self, user: impl Into<String>) -> Self {
417        self.user = Some(user.into());
418        self
419    }
420
421    /// Sets a provider hint for the request.
422    ///
423    /// # Arguments
424    ///
425    /// * `provider` - The name of the provider to use.
426    pub fn with_provider(mut self, provider: impl Into<String>) -> Self {
427        self.provider = Some(provider.into());
428        self
429    }
430
431    /// Enables or disables streaming for the response.
432    ///
433    /// # Arguments
434    ///
435    /// * `stream` - `true` to enable streaming, `false` to disable.
436    pub fn with_stream(mut self, stream: bool) -> Self {
437        self.stream = Some(stream);
438        self
439    }
440
441    /// Sets the logit bias for the chat completion.
442    ///
443    /// # Arguments
444    ///
445    /// * `logit_bias` - A map of token IDs to bias values.
446    pub fn with_logit_bias(mut self, logit_bias: serde_json::Value) -> Self {
447        self.logit_bias = Some(logit_bias);
448        self
449    }
450
451    /// Enables or disables log probabilities for the response.
452    ///
453    /// # Arguments
454    ///
455    /// * `logprobs` - `true` to include log probabilities.
456    pub fn with_logprobs(mut self, logprobs: bool) -> Self {
457        self.logprobs = Some(logprobs);
458        self
459    }
460
461    /// Sets the number of most likely tokens to return at each position.
462    ///
463    /// # Arguments
464    ///
465    /// * `top_logprobs` - The number of top log probabilities to return.
466    pub fn with_top_logprobs(mut self, top_logprobs: u32) -> Self {
467        self.top_logprobs = Some(top_logprobs);
468        self
469    }
470
471    /// Sets the number of chat completion choices to generate.
472    ///
473    /// # Arguments
474    ///
475    /// * `n` - The number of completions to generate.
476    pub fn with_n(mut self, n: u32) -> Self {
477        self.n = Some(n);
478        self
479    }
480
481    /// Sets the response format for the chat completion.
482    ///
483    /// # Arguments
484    ///
485    /// * `response_format` - The format the model must output.
486    pub fn with_response_format(mut self, response_format: ResponseFormat) -> Self {
487        self.response_format = Some(response_format);
488        self
489    }
490
491    /// Sets the tools available to the model.
492    ///
493    /// # Arguments
494    ///
495    /// * `tools` - A list of tools the model can use.
496    pub fn with_tools(mut self, tools: Vec<Tool>) -> Self {
497        self.tools = Some(tools);
498        self
499    }
500
501    /// Sets the tool choice for the chat completion.
502    ///
503    /// # Arguments
504    ///
505    /// * `tool_choice` - Controls which tool the model uses.
506    pub fn with_tool_choice(mut self, tool_choice: ToolChoice) -> Self {
507        self.tool_choice = Some(tool_choice);
508        self
509    }
510
511    /// Sets the thinking configuration for Gemini 3 and 2.5 series models.
512    ///
513    /// # Arguments
514    ///
515    /// * `thinking_config` - Configuration for thinking capabilities.
516    pub fn with_thinking_config(mut self, thinking_config: ThinkingConfig) -> Self {
517        self.thinking_config = Some(thinking_config);
518        self
519    }
520
521    /// Enables thought summaries in the response (Gemini 3 and 2.5 series).
522    ///
523    /// # Arguments
524    ///
525    /// * `include_thoughts` - Whether to include thought summaries.
526    pub fn with_include_thoughts(mut self, include_thoughts: bool) -> Self {
527        let mut config = self.thinking_config.unwrap_or_default();
528        config.include_thoughts = Some(include_thoughts);
529        self.thinking_config = Some(config);
530        self
531    }
532
533    /// Sets the thinking level for Gemini 3 models.
534    ///
535    /// # Arguments
536    ///
537    /// * `thinking_level` - The thinking level (minimal, low, medium, high).
538    pub fn with_thinking_level(mut self, thinking_level: ThinkingLevel) -> Self {
539        let mut config = self.thinking_config.unwrap_or_default();
540        config.thinking_level = Some(thinking_level);
541        self.thinking_config = Some(config);
542        self
543    }
544
545    /// Sets the thinking budget for Gemini 2.5 models.
546    ///
547    /// # Arguments
548    ///
549    /// * `thinking_budget` - Number of thinking tokens (-1 for dynamic, 0 to disable).
550    pub fn with_thinking_budget(mut self, thinking_budget: i32) -> Self {
551        let mut config = self.thinking_config.unwrap_or_default();
552        config.thinking_budget = Some(thinking_budget);
553        self.thinking_config = Some(config);
554        self
555    }
556
557    /// Validates that the request parameters are compatible with OpenAI standards.
558    ///
559    /// This method checks parameter ranges and values to ensure they match OpenAI's API specifications.
560    /// Also validates Gemini 3 specific parameters like thinking configuration.
561    ///
562    /// # Returns
563    ///
564    /// A `Result` indicating whether the request is valid for OpenAI compatibility.
565    pub fn validate_openai_compatibility(&self) -> Result<(), String> {
566        // Validate temperature
567        if let Some(temp) = self.temperature {
568            if !(0.0..=2.0).contains(&temp) {
569                return Err(format!(
570                    "Temperature must be between 0.0 and 2.0, got {}",
571                    temp
572                ));
573            }
574        }
575
576        // Validate top_p
577        if let Some(top_p) = self.top_p {
578            if !(0.0..=1.0).contains(&top_p) {
579                return Err(format!("Top-p must be between 0.0 and 1.0, got {}", top_p));
580            }
581        }
582
583        // Validate frequency_penalty
584        if let Some(fp) = self.frequency_penalty {
585            if !(-2.0..=2.0).contains(&fp) {
586                return Err(format!(
587                    "Frequency penalty must be between -2.0 and 2.0, got {}",
588                    fp
589                ));
590            }
591        }
592
593        // Validate presence_penalty
594        if let Some(pp) = self.presence_penalty {
595            if !(-2.0..=2.0).contains(&pp) {
596                return Err(format!(
597                    "Presence penalty must be between -2.0 and 2.0, got {}",
598                    pp
599                ));
600            }
601        }
602
603        // Validate max_tokens
604        if let Some(mt) = self.max_tokens {
605            if mt == 0 {
606                return Err("Max tokens must be greater than 0".to_string());
607            }
608        }
609
610        // Validate top_logprobs
611        if let Some(tlp) = self.top_logprobs {
612            if !(0..=20).contains(&tlp) {
613                return Err(format!(
614                    "Top logprobs must be between 0 and 20, got {}",
615                    tlp
616                ));
617            }
618        }
619
620        // Validate n
621        if let Some(n) = self.n {
622            if n == 0 {
623                return Err("n must be greater than 0".to_string());
624            }
625        }
626
627        // Validate stop sequences
628        if let Some(stop) = &self.stop {
629            if stop.len() > 4 {
630                return Err("Cannot have more than 4 stop sequences".to_string());
631            }
632            for seq in stop {
633                if seq.is_empty() {
634                    return Err("Stop sequences cannot be empty".to_string());
635                }
636                if seq.len() > 64 {
637                    return Err("Stop sequences cannot be longer than 64 characters".to_string());
638                }
639            }
640        }
641
642        // Validate thinking configuration for Gemini models
643        if let Some(thinking_config) = &self.thinking_config {
644            self.validate_thinking_config(thinking_config)?;
645        }
646
647        Ok(())
648    }
649
650    /// Validates thinking configuration parameters for Gemini models.
651    fn validate_thinking_config(&self, config: &ThinkingConfig) -> Result<(), String> {
652        let is_gemini_3 = self.model.contains("gemini-3");
653        let is_gemini_2_5 = self.model.contains("gemini-2.5");
654        let is_gemini_3_pro = self.model.contains("gemini-3-pro");
655
656        // Validate thinking level (Gemini 3 only)
657        if let Some(level) = &config.thinking_level {
658            if !is_gemini_3 {
659                return Err("thinking_level is only supported for Gemini 3 models".to_string());
660            }
661
662            match level {
663                ThinkingLevel::Minimal | ThinkingLevel::Medium => {
664                    if is_gemini_3_pro {
665                        return Err(
666                            "Gemini 3 Pro only supports 'low' and 'high' thinking levels"
667                                .to_string(),
668                        );
669                    }
670                }
671                _ => {}
672            }
673        }
674
675        // Validate thinking budget (Gemini 2.5 only)
676        if let Some(budget) = config.thinking_budget {
677            if !is_gemini_2_5 {
678                return Err("thinking_budget is only supported for Gemini 2.5 models".to_string());
679            }
680
681            // Validate budget ranges based on model
682            if self.model.contains("2.5-pro") {
683                if budget != -1 && !(128..=32768).contains(&budget) {
684                    return Err(
685                        "Gemini 2.5 Pro thinking budget must be -1 (dynamic) or between 128-32768"
686                            .to_string(),
687                    );
688                }
689            } else if self.model.contains("2.5-flash")
690                && budget != -1
691                && !(0..=24576).contains(&budget)
692            {
693                return Err(
694                    "Gemini 2.5 Flash thinking budget must be -1 (dynamic) or between 0-24576"
695                        .to_string(),
696                );
697            }
698        }
699
700        // Warn about conflicting parameters
701        if config.thinking_level.is_some() && config.thinking_budget.is_some() {
702            return Err("Cannot specify both thinking_level (Gemini 3) and thinking_budget (Gemini 2.5) in the same request".to_string());
703        }
704
705        Ok(())
706    }
707
708    /// Checks if the model supports thinking capabilities.
709    pub fn supports_thinking(&self) -> bool {
710        self.model.contains("gemini-3") || self.model.contains("gemini-2.5")
711    }
712
713    /// Checks if the model requires thought signatures for function calling.
714    pub fn requires_thought_signatures(&self) -> bool {
715        self.model.contains("gemini-3")
716    }
717}
718
719impl ChatMessage {
720    /// Creates a new message with the `System` role.
721    ///
722    /// # Arguments
723    ///
724    /// * `content` - The content of the system message.
725    pub fn system(content: impl Into<String>) -> Self {
726        Self {
727            role: MessageRole::System,
728            content: content.into(),
729        }
730    }
731
732    /// Creates a new message with the `User` role.
733    ///
734    /// # Arguments
735    ///
736    /// * `content` - The content of the user message.
737    pub fn user(content: impl Into<String>) -> Self {
738        Self {
739            role: MessageRole::User,
740            content: content.into(),
741        }
742    }
743
744    /// Creates a new message with the `Assistant` role.
745    ///
746    /// # Arguments
747    ///
748    /// * `content` - The content of the assistant message.
749    pub fn assistant(content: impl Into<String>) -> Self {
750        Self {
751            role: MessageRole::Assistant,
752            content: content.into(),
753        }
754    }
755}
756
757// Legacy compatibility types - keep existing types for backward compatibility
758use uuid::Uuid;
759
760/// Represents a user account (legacy).
761#[derive(Debug, Clone, Serialize, Deserialize)]
762pub struct User {
763    /// The unique ID of the user.
764    pub id: Uuid,
765    /// The user's identifier string.
766    pub user_id: String,
767    /// The name of the user's subscription plan.
768    pub plan_name: String,
769    /// The user's current credit balance.
770    pub current_credits: f64,
771    /// The amount of credits the user has used in the current month.
772    pub credits_used_this_month: f64,
773    /// The date when the user's credits will reset.
774    pub credits_reset_date: DateTime<Utc>,
775    /// Indicates if the user account is active.
776    pub is_active: bool,
777    /// The timestamp of when the user account was created.
778    pub created_at: DateTime<Utc>,
779}
780
781/// Represents an API key (legacy).
782#[derive(Debug, Clone, Serialize, Deserialize)]
783pub struct ApiKey {
784    /// The unique ID of the API key.
785    pub id: Uuid,
786    /// The API key string.
787    pub key: String,
788    /// The ID of the user who owns the key.
789    pub owner_id: Uuid,
790    /// Indicates if the API key is active.
791    pub is_active: bool,
792    /// The timestamp of when the key was created.
793    pub created_at: DateTime<Utc>,
794    /// The expiration date of the key, if any.
795    pub expires_at: Option<DateTime<Utc>>,
796    /// A description of the key.
797    pub description: Option<String>,
798    /// The timestamp of when the key was last used.
799    pub last_used_at: Option<DateTime<Utc>>,
800}
801
802/// Represents usage statistics over a period (legacy).
803#[derive(Debug, Clone, Serialize, Deserialize)]
804pub struct UsageStats {
805    /// The number of days in the usage period.
806    pub period_days: u32,
807    /// A list of daily usage data.
808    pub daily_usage: Vec<DailyUsage>,
809    /// A list of recent credit transactions.
810    pub recent_transactions: Vec<CreditTransaction>,
811    /// The total number of requests made in the period.
812    pub total_requests: u64,
813    /// The total number of tokens used in the period.
814    pub total_tokens: u64,
815}
816
817/// Represents usage data for a single day (legacy).
818#[derive(Debug, Clone, Serialize, Deserialize)]
819pub struct DailyUsage {
820    /// The date for the usage data.
821    pub date: String,
822    /// The number of credits used on this day.
823    pub credits_used: f64,
824    /// The number of requests made on this day.
825    pub requests: u64,
826    /// The number of tokens used on this day.
827    pub tokens: u64,
828}
829
830/// Represents a single credit transaction (legacy).
831#[derive(Debug, Clone, Serialize, Deserialize)]
832pub struct CreditTransaction {
833    /// The unique ID of the transaction.
834    pub id: Uuid,
835    /// The type of the transaction.
836    pub transaction_type: TransactionType,
837    /// The amount of credits involved in the transaction.
838    pub credits_amount: f64,
839    /// The credit balance after the transaction.
840    pub credits_balance_after: f64,
841    /// The provider associated with the transaction, if any.
842    pub provider: Option<String>,
843    /// The model associated with the transaction, if any.
844    pub model: Option<String>,
845    /// A description of the transaction.
846    pub description: String,
847    /// The timestamp of when the transaction occurred.
848    pub created_at: DateTime<Utc>,
849}
850
851/// The type of credit transaction (legacy).
852#[derive(Debug, Clone, Serialize, Deserialize)]
853#[serde(rename_all = "lowercase")]
854pub enum TransactionType {
855    /// A transaction for API usage.
856    Usage,
857    /// A transaction for a credit reset.
858    Reset,
859    /// A transaction for a credit purchase.
860    Purchase,
861    /// A transaction for a credit refund.
862    Refund,
863}
864
865// Legacy aliases for backward compatibility
866/// A legacy type alias for `MessageRole`.
867pub type ChatRole = MessageRole;
868/// A legacy type alias for `Usage`.
869pub type ChatUsage = Usage;
870/// A legacy type alias for `HealthStatus`.
871pub type HealthCheck = HealthStatus;
872
873/// Represents the status of backend services (legacy).
874#[derive(Debug, Clone, Serialize, Deserialize)]
875pub struct HealthServices {
876    /// The status of the database connection.
877    pub database: bool,
878    /// The status of the Redis connection.
879    pub redis: bool,
880    /// The overall status of AI providers.
881    pub providers: bool,
882}
883
884/// The health status of the API (legacy).
885#[derive(Debug, Clone, Serialize, Deserialize)]
886#[serde(rename_all = "lowercase")]
887pub enum HealthStatusEnum {
888    /// The API is healthy.
889    Healthy,
890    /// The API is in a degraded state.
891    Degraded,
892    /// The API is unhealthy.
893    Unhealthy,
894    /// The API needs initialization.
895    NeedsInit,
896}
897
898/// Represents the format that the model must output.
899#[derive(Debug, Clone, Serialize, Deserialize)]
900#[serde(rename_all = "snake_case")]
901pub enum ResponseFormat {
902    /// The model can return text.
903    Text,
904    /// The model must return a valid JSON object.
905    JsonObject,
906    /// The model must return a JSON object that matches the provided schema.
907    JsonSchema {
908        /// The JSON Schema that the model's output must conform to.
909        json_schema: serde_json::Value,
910    },
911}
912
913/// Represents a tool that the model can use.
914#[derive(Debug, Clone, Serialize, Deserialize)]
915pub struct Tool {
916    /// The type of the tool (currently only "function" is supported).
917    pub r#type: ToolType,
918    /// The function definition describing the tool's capabilities.
919    pub function: FunctionDefinition,
920}
921
922/// The type of tool.
923#[derive(Debug, Clone, Serialize, Deserialize)]
924#[serde(rename_all = "snake_case")]
925pub enum ToolType {
926    /// A function tool.
927    Function,
928}
929
930/// Represents a function definition for a tool.
931#[derive(Debug, Clone, Serialize, Deserialize)]
932pub struct FunctionDefinition {
933    /// The name of the function.
934    pub name: String,
935    /// A description of what the function does.
936    #[serde(skip_serializing_if = "Option::is_none")]
937    pub description: Option<String>,
938    /// The parameters the function accepts, described as a JSON Schema object.
939    #[serde(skip_serializing_if = "Option::is_none")]
940    pub parameters: Option<serde_json::Value>,
941}
942
943/// Controls which tool is called by the model.
944#[derive(Debug, Clone, Serialize, Deserialize)]
945#[serde(untagged)]
946pub enum ToolChoice {
947    /// No tool is called.
948    None,
949    /// The model chooses which tool to call.
950    Auto,
951    /// A specific tool is called.
952    Tool {
953        /// The type of the tool being called.
954        r#type: ToolType,
955        /// The function to call within the tool.
956        function: ToolFunction,
957    },
958}
959
960/// Represents a tool function call.
961#[derive(Debug, Clone, Serialize, Deserialize)]
962pub struct ToolFunction {
963    /// The name of the function to call.
964    pub name: String,
965}
966
967/// Configuration for thinking capabilities in Gemini 3 and 2.5 series models.
968#[derive(Debug, Clone, Serialize, Deserialize, Default)]
969pub struct ThinkingConfig {
970    /// Whether to include thought summaries in the response.
971    #[serde(skip_serializing_if = "Option::is_none")]
972    pub include_thoughts: Option<bool>,
973
974    /// The thinking level for Gemini 3 models (low, high for Pro; minimal, low, medium, high for Flash).
975    #[serde(skip_serializing_if = "Option::is_none")]
976    pub thinking_level: Option<ThinkingLevel>,
977
978    /// The thinking budget for Gemini 2.5 models (number of thinking tokens).
979    #[serde(skip_serializing_if = "Option::is_none")]
980    pub thinking_budget: Option<i32>,
981}
982
983/// Thinking levels for Gemini 3 models.
984#[derive(Debug, Clone, Serialize, Deserialize)]
985#[serde(rename_all = "lowercase")]
986pub enum ThinkingLevel {
987    /// Minimal thinking (Gemini 3 Flash only) - model likely won't think.
988    Minimal,
989    /// Low thinking level - faster responses with basic reasoning.
990    Low,
991    /// Medium thinking level (Gemini 3 Flash only) - balanced reasoning and speed.
992    Medium,
993    /// High thinking level - deep reasoning for complex tasks (default).
994    High,
995}
996
997/// Represents a content part that may include thought signatures.
998#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
999pub struct ContentPart {
1000    /// The text content of the part.
1001    #[serde(skip_serializing_if = "Option::is_none")]
1002    pub text: Option<String>,
1003
1004    /// Function call information if this part contains a function call.
1005    #[serde(skip_serializing_if = "Option::is_none")]
1006    pub function_call: Option<FunctionCall>,
1007
1008    /// Function response information if this part contains a function response.
1009    #[serde(skip_serializing_if = "Option::is_none")]
1010    pub function_response: Option<FunctionResponse>,
1011
1012    /// Indicates if this part contains thought content.
1013    #[serde(skip_serializing_if = "Option::is_none")]
1014    pub thought: Option<bool>,
1015
1016    /// Encrypted thought signature for preserving reasoning context across turns.
1017    #[serde(skip_serializing_if = "Option::is_none")]
1018    pub thought_signature: Option<String>,
1019}
1020
1021/// Represents a function call in the content.
1022#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1023pub struct FunctionCall {
1024    /// The name of the function being called.
1025    pub name: String,
1026    /// The arguments for the function call as a JSON object.
1027    pub args: serde_json::Value,
1028}
1029
1030/// Represents a function response in the content.
1031#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1032pub struct FunctionResponse {
1033    /// The name of the function that was called.
1034    pub name: String,
1035    /// The response from the function call.
1036    pub response: serde_json::Value,
1037}
1038
1039/// Enhanced chat message that supports Gemini 3 thinking capabilities.
1040#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1041pub struct EnhancedChatMessage {
1042    /// The role of the message author.
1043    pub role: MessageRole,
1044    /// The content parts of the message (supports text, function calls, and thought signatures).
1045    pub parts: Vec<ContentPart>,
1046}
1047
1048/// Enhanced usage statistics that include thinking tokens.
1049#[derive(Debug, Clone, Serialize, Deserialize)]
1050pub struct EnhancedUsage {
1051    /// The number of tokens in the prompt.
1052    pub prompt_tokens: u32,
1053    /// The number of tokens in the generated completion.
1054    pub completion_tokens: u32,
1055    /// The total number of tokens used in the request (prompt + completion).
1056    pub total_tokens: u32,
1057    /// The number of thinking tokens used (Gemini 3 and 2.5 series).
1058    #[serde(skip_serializing_if = "Option::is_none")]
1059    pub thoughts_token_count: Option<u32>,
1060}
1061
1062impl ThinkingConfig {
1063    /// Creates a new thinking configuration with default values.
1064    pub fn new() -> Self {
1065        Self::default()
1066    }
1067
1068    /// Creates a configuration for Gemini 3 models with specified thinking level.
1069    ///
1070    /// # Arguments
1071    ///
1072    /// * `level` - The thinking level to use.
1073    /// * `include_thoughts` - Whether to include thought summaries.
1074    pub fn gemini_3(level: ThinkingLevel, include_thoughts: bool) -> Self {
1075        Self {
1076            thinking_level: Some(level),
1077            include_thoughts: Some(include_thoughts),
1078            thinking_budget: None,
1079        }
1080    }
1081
1082    /// Creates a configuration for Gemini 2.5 models with specified thinking budget.
1083    ///
1084    /// # Arguments
1085    ///
1086    /// * `budget` - The thinking budget (-1 for dynamic, 0 to disable, or specific token count).
1087    /// * `include_thoughts` - Whether to include thought summaries.
1088    pub fn gemini_2_5(budget: i32, include_thoughts: bool) -> Self {
1089        Self {
1090            thinking_budget: Some(budget),
1091            include_thoughts: Some(include_thoughts),
1092            thinking_level: None,
1093        }
1094    }
1095
1096    /// Creates a configuration optimized for complex reasoning tasks.
1097    pub fn high_reasoning() -> Self {
1098        Self {
1099            thinking_level: Some(ThinkingLevel::High),
1100            include_thoughts: Some(true),
1101            thinking_budget: Some(-1), // Dynamic for 2.5 models
1102        }
1103    }
1104
1105    /// Creates a configuration optimized for fast responses.
1106    pub fn fast_response() -> Self {
1107        Self {
1108            thinking_level: Some(ThinkingLevel::Low),
1109            include_thoughts: Some(false),
1110            thinking_budget: Some(512), // Low budget for 2.5 models
1111        }
1112    }
1113}
1114
1115impl ContentPart {
1116    /// Creates a new text content part.
1117    pub fn text(content: impl Into<String>) -> Self {
1118        Self {
1119            text: Some(content.into()),
1120            function_call: None,
1121            function_response: None,
1122            thought: None,
1123            thought_signature: None,
1124        }
1125    }
1126
1127    /// Creates a new function call content part.
1128    pub fn function_call(name: impl Into<String>, args: serde_json::Value) -> Self {
1129        Self {
1130            text: None,
1131            function_call: Some(FunctionCall {
1132                name: name.into(),
1133                args,
1134            }),
1135            function_response: None,
1136            thought: None,
1137            thought_signature: None,
1138        }
1139    }
1140
1141    /// Creates a new function response content part.
1142    pub fn function_response(name: impl Into<String>, response: serde_json::Value) -> Self {
1143        Self {
1144            text: None,
1145            function_call: None,
1146            function_response: Some(FunctionResponse {
1147                name: name.into(),
1148                response,
1149            }),
1150            thought: None,
1151            thought_signature: None,
1152        }
1153    }
1154
1155    /// Adds a thought signature to this content part.
1156    pub fn with_thought_signature(mut self, signature: impl Into<String>) -> Self {
1157        self.thought_signature = Some(signature.into());
1158        self
1159    }
1160
1161    /// Marks this content part as containing thought content.
1162    pub fn as_thought(mut self) -> Self {
1163        self.thought = Some(true);
1164        self
1165    }
1166}
1167
1168impl EnhancedChatMessage {
1169    /// Creates a new enhanced message with the `System` role.
1170    pub fn system(content: impl Into<String>) -> Self {
1171        Self {
1172            role: MessageRole::System,
1173            parts: vec![ContentPart::text(content)],
1174        }
1175    }
1176
1177    /// Creates a new enhanced message with the `User` role.
1178    pub fn user(content: impl Into<String>) -> Self {
1179        Self {
1180            role: MessageRole::User,
1181            parts: vec![ContentPart::text(content)],
1182        }
1183    }
1184
1185    /// Creates a new enhanced message with the `Assistant` role.
1186    pub fn assistant(content: impl Into<String>) -> Self {
1187        Self {
1188            role: MessageRole::Assistant,
1189            parts: vec![ContentPart::text(content)],
1190        }
1191    }
1192
1193    /// Creates a new enhanced message with multiple content parts.
1194    pub fn with_parts(role: MessageRole, parts: Vec<ContentPart>) -> Self {
1195        Self { role, parts }
1196    }
1197}
1198
1199/// Represents a streaming chat completion response (OpenAI delta format).
1200#[derive(Debug, Clone, Serialize, Deserialize)]
1201pub struct ChatCompletionStreamResponse {
1202    /// A unique identifier for the chat completion.
1203    pub id: String,
1204    /// The type of object, which is always "chat.completion.chunk".
1205    pub object: String,
1206    /// The Unix timestamp (in seconds) of when the completion was created.
1207    pub created: u64,
1208    /// The model that was used for the completion.
1209    pub model: String,
1210    /// A list of chat completion choices.
1211    pub choices: Vec<ChatCompletionStreamChoice>,
1212    /// Information about the token usage for this completion (only present in the final chunk).
1213    #[serde(skip_serializing_if = "Option::is_none")]
1214    pub usage: Option<Usage>,
1215}
1216
1217/// Represents a single choice in a streaming chat completion response.
1218#[derive(Debug, Clone, Serialize, Deserialize)]
1219pub struct ChatCompletionStreamChoice {
1220    /// The index of the choice in the list of choices.
1221    pub index: u32,
1222    /// The delta containing the new content for this choice.
1223    pub delta: ChatCompletionStreamDelta,
1224    /// The reason the model stopped generating tokens (only present in the final chunk).
1225    #[serde(skip_serializing_if = "Option::is_none")]
1226    pub finish_reason: Option<String>,
1227}
1228
1229/// Represents the delta (change) in a streaming chat completion response.
1230#[derive(Debug, Clone, Serialize, Deserialize)]
1231pub struct ChatCompletionStreamDelta {
1232    /// The role of the message (only present in the first chunk).
1233    #[serde(skip_serializing_if = "Option::is_none")]
1234    pub role: Option<String>,
1235    /// The new content for this chunk.
1236    #[serde(skip_serializing_if = "Option::is_none")]
1237    pub content: Option<String>,
1238    /// Tool calls for this chunk (if any).
1239    #[serde(skip_serializing_if = "Option::is_none")]
1240    pub tool_calls: Option<Vec<ToolCall>>,
1241}
1242
1243/// Represents a tool call in a streaming response.
1244#[derive(Debug, Clone, Serialize, Deserialize)]
1245pub struct ToolCall {
1246    /// The index of the tool call.
1247    pub index: u32,
1248    /// The ID of the tool call.
1249    #[serde(skip_serializing_if = "Option::is_none")]
1250    pub id: Option<String>,
1251    /// The type of the tool call.
1252    #[serde(skip_serializing_if = "Option::is_none")]
1253    pub r#type: Option<String>,
1254    /// The function being called.
1255    #[serde(skip_serializing_if = "Option::is_none")]
1256    pub function: Option<ToolCallFunction>,
1257}
1258
1259/// Represents a function call in a tool call.
1260#[derive(Debug, Clone, Serialize, Deserialize)]
1261pub struct ToolCallFunction {
1262    /// The name of the function.
1263    #[serde(skip_serializing_if = "Option::is_none")]
1264    pub name: Option<String>,
1265    /// The arguments for the function.
1266    #[serde(skip_serializing_if = "Option::is_none")]
1267    pub arguments: Option<String>,
1268}