Skip to main content

rainy_sdk/
models.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4
5/// Represents a single message in a chat conversation.
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
7pub struct ChatMessage {
8    /// The role of the message author.
9    pub role: MessageRole,
10    /// The content of the message.
11    pub content: String,
12}
13
14/// The role of a message's author.
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
16#[serde(rename_all = "lowercase")]
17pub enum MessageRole {
18    /// A message from the system, setting the context or instructions for the assistant.
19    System,
20    /// A message from the user.
21    User,
22    /// A message from the assistant.
23    Assistant,
24}
25
26/// The search provider to use for web research.
27#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
28#[serde(rename_all = "lowercase")]
29pub enum ResearchProvider {
30    /// Use Exa (formerly Metaphor) for high-quality semantic search.
31    #[default]
32    Exa,
33    /// Use Tavily for comprehensive web search and content extraction.
34    Tavily,
35    /// Automatically select the best provider based on the query.
36    Auto,
37}
38
39/// The depth of the research operation.
40#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
41#[serde(rename_all = "lowercase")]
42pub enum ResearchDepth {
43    /// Basic search (faster, lower cost).
44    #[default]
45    Basic,
46    /// Deep search (more thorough, higher cost, includes more context).
47    Advanced,
48}
49
50/// Represents a request to create a chat completion.
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct ChatCompletionRequest {
53    /// The identifier of the model to use for the completion (e.g., "gpt-4o", "claude-sonnet-4").
54    pub model: String,
55
56    /// A list of messages that form the conversation history.
57    pub messages: Vec<ChatMessage>,
58
59    /// The sampling temperature to use, between 0.0 and 2.0. Higher values will make the output
60    /// more random, while lower values will make it more focused and deterministic.
61    #[serde(skip_serializing_if = "Option::is_none")]
62    pub temperature: Option<f32>,
63
64    /// The maximum number of tokens to generate in the completion.
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub max_tokens: Option<u32>,
67
68    /// The nucleus sampling parameter. The model considers the results of the tokens with `top_p`
69    /// probability mass. So, 0.1 means only the tokens comprising the top 10% probability mass are considered.
70    #[serde(skip_serializing_if = "Option::is_none")]
71    pub top_p: Option<f32>,
72
73    /// A penalty applied to new tokens based on their frequency in the text so far.
74    /// It decreases the model's likelihood to repeat the same line verbatim.
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub frequency_penalty: Option<f32>,
77
78    /// A penalty applied to new tokens based on whether they appear in the text so far.
79    /// It increases the model's likelihood to talk about new topics.
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub presence_penalty: Option<f32>,
82
83    /// A list of sequences that will cause the model to stop generating further tokens.
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub stop: Option<Vec<String>>,
86
87    /// A unique identifier representing your end-user, which can help in monitoring and
88    /// tracking conversations.
89    #[serde(skip_serializing_if = "Option::is_none")]
90    pub user: Option<String>,
91
92    /// A hint to the router about which provider to use for the model.
93    #[serde(skip_serializing_if = "Option::is_none")]
94    pub provider: Option<String>,
95
96    /// If set to `true`, the response will be streamed as a series of events.
97    #[serde(skip_serializing_if = "Option::is_none")]
98    pub stream: Option<bool>,
99
100    /// Modify the likelihood of specified tokens appearing in the completion.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub logit_bias: Option<serde_json::Value>,
103
104    /// Whether to return log probabilities of the output tokens.
105    #[serde(skip_serializing_if = "Option::is_none")]
106    pub logprobs: Option<bool>,
107
108    /// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position.
109    #[serde(skip_serializing_if = "Option::is_none")]
110    pub top_logprobs: Option<u32>,
111
112    /// How many chat completion choices to generate for each input message.
113    #[serde(skip_serializing_if = "Option::is_none")]
114    pub n: Option<u32>,
115
116    /// An object specifying the format that the model must output.
117    #[serde(skip_serializing_if = "Option::is_none")]
118    pub response_format: Option<ResponseFormat>,
119
120    /// A list of tools the model may call.
121    #[serde(skip_serializing_if = "Option::is_none")]
122    pub tools: Option<Vec<Tool>>,
123
124    /// Controls which (if any) tool is called by the model.
125    #[serde(skip_serializing_if = "Option::is_none")]
126    pub tool_choice: Option<ToolChoice>,
127
128    /// Configuration for thinking capabilities (Gemini 3 and 2.5 series).
129    #[serde(skip_serializing_if = "Option::is_none")]
130    pub thinking_config: Option<ThinkingConfig>,
131}
132
133/// Represents the response from a chat completion request.
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct ChatCompletionResponse {
136    /// A unique identifier for the chat completion.
137    pub id: String,
138
139    /// The type of object, which is always "chat.completion".
140    pub object: String,
141
142    /// The Unix timestamp (in seconds) of when the completion was created.
143    pub created: u64,
144
145    /// The model that was used for the completion.
146    pub model: String,
147
148    /// A list of chat completion choices.
149    pub choices: Vec<ChatChoice>,
150
151    /// Information about the token usage for this completion.
152    #[serde(skip_serializing_if = "Option::is_none")]
153    pub usage: Option<Usage>,
154}
155
156/// Represents a chunk of a streaming chat completion response.
157#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct ChatCompletionChunk {
159    /// A unique identifier for the chat completion.
160    pub id: String,
161
162    /// The type of object, which is always "chat.completion.chunk".
163    pub object: String,
164
165    /// The Unix timestamp (in seconds) of when the completion was created.
166    pub created: u64,
167
168    /// The model that was used for the completion.
169    pub model: String,
170
171    /// A list of chat completion choices.
172    pub choices: Vec<ChatCompletionChunkChoice>,
173}
174
175/// Represents a single choice in a streaming chat completion response.
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct ChatCompletionChunkChoice {
178    /// The index of the choice in the list of choices.
179    pub index: u32,
180
181    /// A delta payload with the content that has changed since the last chunk.
182    pub delta: ChatCompletionChunkDelta,
183
184    /// The reason the model stopped generating tokens.
185    #[serde(skip_serializing_if = "Option::is_none")]
186    pub finish_reason: Option<String>,
187}
188
189/// Represents the delta payload of a streaming chat completion chunk.
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct ChatCompletionChunkDelta {
192    /// The role of the message author.
193    #[serde(skip_serializing_if = "Option::is_none")]
194    pub role: Option<MessageRole>,
195
196    /// The content of the message.
197    #[serde(skip_serializing_if = "Option::is_none")]
198    pub content: Option<String>,
199
200    /// The thinking content (for Gemini 3 models).
201    #[serde(skip_serializing_if = "Option::is_none")]
202    pub thought: Option<String>,
203}
204
205/// Represents a single choice in a chat completion response.
206#[derive(Debug, Clone, Serialize, Deserialize)]
207pub struct ChatChoice {
208    /// The index of the choice in the list of choices.
209    pub index: u32,
210
211    /// The message generated by the model.
212    pub message: ChatMessage,
213
214    /// The reason the model stopped generating tokens.
215    pub finish_reason: String,
216}
217
218/// Represents the token usage statistics for a chat completion.
219#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct Usage {
221    /// The number of tokens in the prompt.
222    pub prompt_tokens: u32,
223
224    /// The number of tokens in the generated completion.
225    pub completion_tokens: u32,
226
227    /// The total number of tokens used in the request (prompt + completion).
228    pub total_tokens: u32,
229}
230
231/// Represents the health status of the Rainy API.
232#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct HealthStatus {
234    /// The overall status of the API (e.g., "healthy", "degraded").
235    pub status: String,
236
237    /// The timestamp of when the health check was performed.
238    pub timestamp: String,
239
240    /// The uptime of the system in seconds.
241    pub uptime: f64,
242
243    /// The status of individual services.
244    pub services: ServiceStatus,
245}
246
247/// Represents the status of individual backend services.
248#[derive(Debug, Clone, Serialize, Deserialize)]
249pub struct ServiceStatus {
250    /// The status of the database connection.
251    pub database: bool,
252
253    /// The status of the Redis connection, if applicable.
254    #[serde(skip_serializing_if = "Option::is_none")]
255    pub redis: Option<bool>,
256
257    /// The overall status of the connections to AI providers.
258    pub providers: bool,
259}
260
261/// Represents the available models and providers.
262#[derive(Debug, Clone, Serialize, Deserialize, Default)]
263pub struct AvailableModels {
264    /// A map where keys are provider names and values are lists of model names.
265    #[serde(default)]
266    pub providers: HashMap<String, Vec<String>>,
267
268    /// The total number of available models across all providers.
269    #[serde(default)]
270    pub total_models: usize,
271
272    /// A list of provider names that are currently active and available.
273    #[serde(default)]
274    pub active_providers: Vec<String>,
275}
276
277/// Represents information about credit usage for a request.
278#[derive(Debug, Clone, Serialize, Deserialize)]
279pub struct CreditInfo {
280    /// The number of credits available before the request.
281    pub current_credits: f64,
282
283    /// The estimated number of credits that the request will cost.
284    pub estimated_cost: f64,
285
286    /// The estimated number of credits remaining after the request.
287    pub credits_after_request: f64,
288
289    /// The date when the credit balance is next scheduled to be reset.
290    pub reset_date: String,
291}
292
293/// Represents metadata extracted from the response headers of an API request.
294#[derive(Debug, Clone)]
295pub struct RequestMetadata {
296    /// The time taken for the request to complete, in milliseconds.
297    pub response_time: Option<u64>,
298
299    /// The AI provider that handled the request.
300    pub provider: Option<String>,
301
302    /// The number of tokens used in the request.
303    pub tokens_used: Option<u32>,
304
305    /// The number of credits used for the request.
306    pub credits_used: Option<f64>,
307
308    /// The number of credits remaining after the request.
309    pub credits_remaining: Option<f64>,
310
311    /// The unique ID of the request, for tracking and debugging.
312    pub request_id: Option<String>,
313}
314
315/// A collection of predefined model constants for convenience.
316/// All models listed here are confirmed to be 100% OpenAI-compatible without parameter adaptations.
317pub mod model_constants {
318    // OpenAI models (fully compatible)
319    /// Constant for the GPT-4o model.
320    pub const OPENAI_GPT_4O: &str = "gpt-4o";
321    /// Constant for the GPT-5 model.
322    pub const OPENAI_GPT_5: &str = "gpt-5";
323    /// Constant for the GPT-5 Pro model.
324    pub const OPENAI_GPT_5_PRO: &str = "gpt-5-pro";
325    /// Constant for the O3 model.
326    pub const OPENAI_O3: &str = "o3";
327    /// Constant for the O4 Mini model.
328    pub const OPENAI_O4_MINI: &str = "o4-mini";
329
330    // Google Gemini models (fully compatible via official compatibility layer)
331    /// Constant for the Gemini 2.5 Pro model.
332    pub const GOOGLE_GEMINI_2_5_PRO: &str = "gemini-2.5-pro";
333    /// Constant for the Gemini 2.5 Flash model.
334    pub const GOOGLE_GEMINI_2_5_FLASH: &str = "gemini-2.5-flash";
335    /// Constant for the Gemini 2.5 Flash Lite model.
336    pub const GOOGLE_GEMINI_2_5_FLASH_LITE: &str = "gemini-2.5-flash-lite";
337
338    // Gemini 3 series - Advanced reasoning models with thinking capabilities
339    /// Constant for the Gemini 3 Pro model with advanced reasoning.
340    pub const GOOGLE_GEMINI_3_PRO: &str = "gemini-3-pro-preview";
341    /// Constant for the Gemini 3 Flash model with thinking capabilities.
342    pub const GOOGLE_GEMINI_3_FLASH: &str = "gemini-3-flash-preview";
343    /// Constant for the Gemini 3 Pro Image model with multimodal reasoning.
344    pub const GOOGLE_GEMINI_3_PRO_IMAGE: &str = "gemini-3-pro-image-preview";
345
346    // Groq models (fully compatible)
347    /// Constant for the Llama 3.1 8B Instant model.
348    pub const GROQ_LLAMA_3_1_8B_INSTANT: &str = "llama-3.1-8b-instant";
349    /// Constant for the Llama 3.3 70B Versatile model.
350    pub const GROQ_LLAMA_3_3_70B_VERSATILE: &str = "llama-3.3-70b-versatile";
351    /// Constant for the moonshotai/kimi-k2-instruct-0905 Instant model.
352    pub const KIMI_K2_0925: &str = "moonshotai/kimi-k2-instruct-0905";
353
354    // Cerebras models (fully compatible)
355    /// Constant for the Llama3.1 8B model.
356    pub const CEREBRAS_LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
357
358    // Enosis Labs models (fully compatible)
359    /// Constant for the Astronomer 1 model.
360    pub const ASTRONOMER_1: &str = "astronomer-1";
361    /// Constant for the Astronomer 1 Max model.
362    pub const ASTRONOMER_1_MAX: &str = "astronomer-1-max";
363    /// Constant for the Astronomer 1.5 model.
364    pub const ASTRONOMER_1_5: &str = "astronomer-1.5";
365    /// Constant for the Astronomer 2 model.
366    pub const ASTRONOMER_2: &str = "astronomer-2";
367    /// Constant for the Astronomer 2 Pro model.
368    pub const ASTRONOMER_2_PRO: &str = "astronomer-2-pro";
369
370    // Legacy aliases for backward compatibility (deprecated - use provider-prefixed versions above)
371    /// Legacy constant for the GPT-4o model (use OPENAI_GPT_4O instead).
372    #[deprecated(note = "Use OPENAI_GPT_4O instead for OpenAI compatibility")]
373    pub const GPT_4O: &str = "openai/gpt-4o";
374    /// Legacy constant for the GPT-5 model (use OPENAI_GPT_5 instead).
375    #[deprecated(note = "Use OPENAI_GPT_5 instead for OpenAI compatibility")]
376    pub const GPT_5: &str = "openai/gpt-5";
377    /// Legacy constant for the Gemini 2.5 Pro model (use GOOGLE_GEMINI_2_5_PRO instead).
378    #[deprecated(note = "Use GOOGLE_GEMINI_2_5_PRO instead for OpenAI compatibility")]
379    pub const GEMINI_2_5_PRO: &str = "google/gemini-2.5-pro";
380    /// Legacy constant for the Gemini 2.5 Flash model (use GOOGLE_GEMINI_2_5_FLASH instead).
381    #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH instead for OpenAI compatibility")]
382    pub const GEMINI_2_5_FLASH: &str = "google/gemini-2.5-flash";
383    /// Legacy constant for the Gemini 2.5 Flash Lite model (use GOOGLE_GEMINI_2_5_FLASH_LITE instead).
384    #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH_LITE instead for OpenAI compatibility")]
385    pub const GEMINI_2_5_FLASH_LITE: &str = "google/gemini-2.5-flash-lite";
386    /// Legacy constant for the Llama 3.1 8B Instant model (use GROQ_LLAMA_3_1_8B_INSTANT instead).
387    #[deprecated(note = "Use GROQ_LLAMA_3_1_8B_INSTANT instead for OpenAI compatibility")]
388    pub const LLAMA_3_1_8B_INSTANT: &str = "groq/llama-3.1-8b-instant";
389    /// Legacy constant for the Llama3.1 8B model (use CEREBRAS_LLAMA3_1_8B instead).
390    #[deprecated(note = "Use CEREBRAS_LLAMA3_1_8B instead for OpenAI compatibility")]
391    pub const LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
392}
393
394/// A collection of predefined provider name constants for convenience.
395pub mod providers {
396    /// Constant for the OpenAI provider.
397    pub const OPENAI: &str = "openai";
398    /// Constant for the Anthropic provider.
399    pub const ANTHROPIC: &str = "anthropic";
400    /// Constant for the Groq provider.
401    pub const GROQ: &str = "groq";
402    /// Constant for the Cerebras provider.
403    pub const CEREBRAS: &str = "cerebras";
404    /// Constant for the Gemini provider.
405    pub const GEMINI: &str = "gemini";
406    /// Constant for the Enosis Labs provider.
407    pub const ENOSISLABS: &str = "enosislabs";
408}
409
410impl ChatCompletionRequest {
411    /// Creates a new `ChatCompletionRequest` with the given model and messages.
412    ///
413    /// # Arguments
414    ///
415    /// * `model` - The identifier of the model to use.
416    /// * `messages` - The list of messages for the conversation.
417    pub fn new(model: impl Into<String>, messages: Vec<ChatMessage>) -> Self {
418        Self {
419            model: model.into(),
420            messages,
421            temperature: None,
422            max_tokens: None,
423            top_p: None,
424            frequency_penalty: None,
425            presence_penalty: None,
426            stop: None,
427            user: None,
428            provider: None,
429            stream: None,
430            logit_bias: None,
431            logprobs: None,
432            top_logprobs: None,
433            n: None,
434            response_format: None,
435            tools: None,
436            tool_choice: None,
437            thinking_config: None,
438        }
439    }
440
441    /// Sets the temperature for the chat completion.
442    ///
443    /// The temperature is clamped between 0.0 and 2.0.
444    ///
445    /// # Arguments
446    ///
447    /// * `temperature` - The sampling temperature.
448    pub fn with_temperature(mut self, temperature: f32) -> Self {
449        self.temperature = Some(temperature.clamp(0.0, 2.0));
450        self
451    }
452
453    /// Sets the maximum number of tokens to generate.
454    ///
455    /// # Arguments
456    ///
457    /// * `max_tokens` - The maximum number of tokens.
458    pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
459        self.max_tokens = Some(max_tokens);
460        self
461    }
462
463    /// Sets the user identifier for the chat completion.
464    ///
465    /// # Arguments
466    ///
467    /// * `user` - A unique identifier for the end-user.
468    pub fn with_user(mut self, user: impl Into<String>) -> Self {
469        self.user = Some(user.into());
470        self
471    }
472
473    /// Sets a provider hint for the request.
474    ///
475    /// # Arguments
476    ///
477    /// * `provider` - The name of the provider to use.
478    pub fn with_provider(mut self, provider: impl Into<String>) -> Self {
479        self.provider = Some(provider.into());
480        self
481    }
482
483    /// Enables or disables streaming for the response.
484    ///
485    /// # Arguments
486    ///
487    /// * `stream` - `true` to enable streaming, `false` to disable.
488    pub fn with_stream(mut self, stream: bool) -> Self {
489        self.stream = Some(stream);
490        self
491    }
492
493    /// Sets the logit bias for the chat completion.
494    ///
495    /// # Arguments
496    ///
497    /// * `logit_bias` - A map of token IDs to bias values.
498    pub fn with_logit_bias(mut self, logit_bias: serde_json::Value) -> Self {
499        self.logit_bias = Some(logit_bias);
500        self
501    }
502
503    /// Enables or disables log probabilities for the response.
504    ///
505    /// # Arguments
506    ///
507    /// * `logprobs` - `true` to include log probabilities.
508    pub fn with_logprobs(mut self, logprobs: bool) -> Self {
509        self.logprobs = Some(logprobs);
510        self
511    }
512
513    /// Sets the number of most likely tokens to return at each position.
514    ///
515    /// # Arguments
516    ///
517    /// * `top_logprobs` - The number of top log probabilities to return.
518    pub fn with_top_logprobs(mut self, top_logprobs: u32) -> Self {
519        self.top_logprobs = Some(top_logprobs);
520        self
521    }
522
523    /// Sets the number of chat completion choices to generate.
524    ///
525    /// # Arguments
526    ///
527    /// * `n` - The number of completions to generate.
528    pub fn with_n(mut self, n: u32) -> Self {
529        self.n = Some(n);
530        self
531    }
532
533    /// Sets the response format for the chat completion.
534    ///
535    /// # Arguments
536    ///
537    /// * `response_format` - The format the model must output.
538    pub fn with_response_format(mut self, response_format: ResponseFormat) -> Self {
539        self.response_format = Some(response_format);
540        self
541    }
542
543    /// Sets the tools available to the model.
544    ///
545    /// # Arguments
546    ///
547    /// * `tools` - A list of tools the model can use.
548    pub fn with_tools(mut self, tools: Vec<Tool>) -> Self {
549        self.tools = Some(tools);
550        self
551    }
552
553    /// Sets the tool choice for the chat completion.
554    ///
555    /// # Arguments
556    ///
557    /// * `tool_choice` - Controls which tool the model uses.
558    pub fn with_tool_choice(mut self, tool_choice: ToolChoice) -> Self {
559        self.tool_choice = Some(tool_choice);
560        self
561    }
562
563    /// Sets the thinking configuration for Gemini 3 and 2.5 series models.
564    ///
565    /// # Arguments
566    ///
567    /// * `thinking_config` - Configuration for thinking capabilities.
568    pub fn with_thinking_config(mut self, thinking_config: ThinkingConfig) -> Self {
569        self.thinking_config = Some(thinking_config);
570        self
571    }
572
573    /// Enables thought summaries in the response (Gemini 3 and 2.5 series).
574    ///
575    /// # Arguments
576    ///
577    /// * `include_thoughts` - Whether to include thought summaries.
578    pub fn with_include_thoughts(mut self, include_thoughts: bool) -> Self {
579        let mut config = self.thinking_config.unwrap_or_default();
580        config.include_thoughts = Some(include_thoughts);
581        self.thinking_config = Some(config);
582        self
583    }
584
585    /// Sets the thinking level for Gemini 3 models.
586    ///
587    /// # Arguments
588    ///
589    /// * `thinking_level` - The thinking level (minimal, low, medium, high).
590    pub fn with_thinking_level(mut self, thinking_level: ThinkingLevel) -> Self {
591        let mut config = self.thinking_config.unwrap_or_default();
592        config.thinking_level = Some(thinking_level);
593        self.thinking_config = Some(config);
594        self
595    }
596
597    /// Sets the thinking budget for Gemini 2.5 models.
598    ///
599    /// # Arguments
600    ///
601    /// * `thinking_budget` - Number of thinking tokens (-1 for dynamic, 0 to disable).
602    pub fn with_thinking_budget(mut self, thinking_budget: i32) -> Self {
603        let mut config = self.thinking_config.unwrap_or_default();
604        config.thinking_budget = Some(thinking_budget);
605        self.thinking_config = Some(config);
606        self
607    }
608
609    /// Validates that the request parameters are compatible with OpenAI standards.
610    ///
611    /// This method checks parameter ranges and values to ensure they match OpenAI's API specifications.
612    /// Also validates Gemini 3 specific parameters like thinking configuration.
613    ///
614    /// # Returns
615    ///
616    /// A `Result` indicating whether the request is valid for OpenAI compatibility.
617    pub fn validate_openai_compatibility(&self) -> Result<(), String> {
618        // Validate temperature
619        if let Some(temp) = self.temperature {
620            if !(0.0..=2.0).contains(&temp) {
621                return Err(format!(
622                    "Temperature must be between 0.0 and 2.0, got {}",
623                    temp
624                ));
625            }
626        }
627
628        // Validate top_p
629        if let Some(top_p) = self.top_p {
630            if !(0.0..=1.0).contains(&top_p) {
631                return Err(format!("Top-p must be between 0.0 and 1.0, got {}", top_p));
632            }
633        }
634
635        // Validate frequency_penalty
636        if let Some(fp) = self.frequency_penalty {
637            if !(-2.0..=2.0).contains(&fp) {
638                return Err(format!(
639                    "Frequency penalty must be between -2.0 and 2.0, got {}",
640                    fp
641                ));
642            }
643        }
644
645        // Validate presence_penalty
646        if let Some(pp) = self.presence_penalty {
647            if !(-2.0..=2.0).contains(&pp) {
648                return Err(format!(
649                    "Presence penalty must be between -2.0 and 2.0, got {}",
650                    pp
651                ));
652            }
653        }
654
655        // Validate max_tokens
656        if let Some(mt) = self.max_tokens {
657            if mt == 0 {
658                return Err("Max tokens must be greater than 0".to_string());
659            }
660        }
661
662        // Validate top_logprobs
663        if let Some(tlp) = self.top_logprobs {
664            if !(0..=20).contains(&tlp) {
665                return Err(format!(
666                    "Top logprobs must be between 0 and 20, got {}",
667                    tlp
668                ));
669            }
670        }
671
672        // Validate n
673        if let Some(n) = self.n {
674            if n == 0 {
675                return Err("n must be greater than 0".to_string());
676            }
677        }
678
679        // Validate stop sequences
680        if let Some(stop) = &self.stop {
681            if stop.len() > 4 {
682                return Err("Cannot have more than 4 stop sequences".to_string());
683            }
684            for seq in stop {
685                if seq.is_empty() {
686                    return Err("Stop sequences cannot be empty".to_string());
687                }
688                if seq.len() > 64 {
689                    return Err("Stop sequences cannot be longer than 64 characters".to_string());
690                }
691            }
692        }
693
694        // Validate thinking configuration for Gemini models
695        if let Some(thinking_config) = &self.thinking_config {
696            self.validate_thinking_config(thinking_config)?;
697        }
698
699        Ok(())
700    }
701
702    /// Validates thinking configuration parameters for Gemini models.
703    fn validate_thinking_config(&self, config: &ThinkingConfig) -> Result<(), String> {
704        let is_gemini_3 = self.model.contains("gemini-3");
705        let is_gemini_2_5 = self.model.contains("gemini-2.5");
706        let is_gemini_3_pro = self.model.contains("gemini-3-pro");
707
708        // Validate thinking level (Gemini 3 only)
709        if let Some(level) = &config.thinking_level {
710            if !is_gemini_3 {
711                return Err("thinking_level is only supported for Gemini 3 models".to_string());
712            }
713
714            match level {
715                ThinkingLevel::Minimal | ThinkingLevel::Medium => {
716                    if is_gemini_3_pro {
717                        return Err(
718                            "Gemini 3 Pro only supports 'low' and 'high' thinking levels"
719                                .to_string(),
720                        );
721                    }
722                }
723                _ => {}
724            }
725        }
726
727        // Validate thinking budget (Gemini 2.5 only)
728        if let Some(budget) = config.thinking_budget {
729            if !is_gemini_2_5 {
730                return Err("thinking_budget is only supported for Gemini 2.5 models".to_string());
731            }
732
733            // Validate budget ranges based on model
734            if self.model.contains("2.5-pro") {
735                if budget != -1 && !(128..=32768).contains(&budget) {
736                    return Err(
737                        "Gemini 2.5 Pro thinking budget must be -1 (dynamic) or between 128-32768"
738                            .to_string(),
739                    );
740                }
741            } else if self.model.contains("2.5-flash")
742                && budget != -1
743                && !(0..=24576).contains(&budget)
744            {
745                return Err(
746                    "Gemini 2.5 Flash thinking budget must be -1 (dynamic) or between 0-24576"
747                        .to_string(),
748                );
749            }
750        }
751
752        // Warn about conflicting parameters
753        if config.thinking_level.is_some() && config.thinking_budget.is_some() {
754            return Err("Cannot specify both thinking_level (Gemini 3) and thinking_budget (Gemini 2.5) in the same request".to_string());
755        }
756
757        Ok(())
758    }
759
760    /// Checks if the model supports thinking capabilities.
761    pub fn supports_thinking(&self) -> bool {
762        self.model.contains("gemini-3") || self.model.contains("gemini-2.5")
763    }
764
765    /// Checks if the model requires thought signatures for function calling.
766    pub fn requires_thought_signatures(&self) -> bool {
767        self.model.contains("gemini-3")
768    }
769}
770
771impl ChatMessage {
772    /// Creates a new message with the `System` role.
773    ///
774    /// # Arguments
775    ///
776    /// * `content` - The content of the system message.
777    pub fn system(content: impl Into<String>) -> Self {
778        Self {
779            role: MessageRole::System,
780            content: content.into(),
781        }
782    }
783
784    /// Creates a new message with the `User` role.
785    ///
786    /// # Arguments
787    ///
788    /// * `content` - The content of the user message.
789    pub fn user(content: impl Into<String>) -> Self {
790        Self {
791            role: MessageRole::User,
792            content: content.into(),
793        }
794    }
795
796    /// Creates a new message with the `Assistant` role.
797    ///
798    /// # Arguments
799    ///
800    /// * `content` - The content of the assistant message.
801    pub fn assistant(content: impl Into<String>) -> Self {
802        Self {
803            role: MessageRole::Assistant,
804            content: content.into(),
805        }
806    }
807}
808
809// Legacy compatibility types - keep existing types for backward compatibility
810use uuid::Uuid;
811
812/// Represents a user account (legacy).
813#[derive(Debug, Clone, Serialize, Deserialize)]
814pub struct User {
815    /// The unique ID of the user.
816    pub id: Uuid,
817    /// The user's identifier string.
818    pub user_id: String,
819    /// The name of the user's subscription plan.
820    pub plan_name: String,
821    /// The user's current credit balance.
822    pub current_credits: f64,
823    /// The amount of credits the user has used in the current month.
824    pub credits_used_this_month: f64,
825    /// The date when the user's credits will reset.
826    pub credits_reset_date: DateTime<Utc>,
827    /// Indicates if the user account is active.
828    pub is_active: bool,
829    /// The timestamp of when the user account was created.
830    pub created_at: DateTime<Utc>,
831}
832
833/// Represents an API key (legacy).
834#[derive(Debug, Clone, Serialize, Deserialize)]
835pub struct ApiKey {
836    /// The unique ID of the API key.
837    pub id: Uuid,
838    /// The API key string.
839    pub key: String,
840    /// The ID of the user who owns the key.
841    pub owner_id: Uuid,
842    /// Indicates if the API key is active.
843    pub is_active: bool,
844    /// The timestamp of when the key was created.
845    pub created_at: DateTime<Utc>,
846    /// The expiration date of the key, if any.
847    pub expires_at: Option<DateTime<Utc>>,
848    /// A description of the key.
849    pub description: Option<String>,
850    /// The timestamp of when the key was last used.
851    pub last_used_at: Option<DateTime<Utc>>,
852}
853
854/// Represents usage statistics over a period (legacy).
855#[derive(Debug, Clone, Serialize, Deserialize)]
856pub struct UsageStats {
857    /// The number of days in the usage period.
858    pub period_days: u32,
859    /// A list of daily usage data.
860    pub daily_usage: Vec<DailyUsage>,
861    /// A list of recent credit transactions.
862    pub recent_transactions: Vec<CreditTransaction>,
863    /// The total number of requests made in the period.
864    pub total_requests: u64,
865    /// The total number of tokens used in the period.
866    pub total_tokens: u64,
867}
868
869/// Represents usage data for a single day (legacy).
870#[derive(Debug, Clone, Serialize, Deserialize)]
871pub struct DailyUsage {
872    /// The date for the usage data.
873    pub date: String,
874    /// The number of credits used on this day.
875    pub credits_used: f64,
876    /// The number of requests made on this day.
877    pub requests: u64,
878    /// The number of tokens used on this day.
879    pub tokens: u64,
880}
881
882/// Represents a single credit transaction (legacy).
883#[derive(Debug, Clone, Serialize, Deserialize)]
884pub struct CreditTransaction {
885    /// The unique ID of the transaction.
886    pub id: Uuid,
887    /// The type of the transaction.
888    pub transaction_type: TransactionType,
889    /// The amount of credits involved in the transaction.
890    pub credits_amount: f64,
891    /// The credit balance after the transaction.
892    pub credits_balance_after: f64,
893    /// The provider associated with the transaction, if any.
894    pub provider: Option<String>,
895    /// The model associated with the transaction, if any.
896    pub model: Option<String>,
897    /// A description of the transaction.
898    pub description: String,
899    /// The timestamp of when the transaction occurred.
900    pub created_at: DateTime<Utc>,
901}
902
903/// The type of credit transaction (legacy).
904#[derive(Debug, Clone, Serialize, Deserialize)]
905#[serde(rename_all = "lowercase")]
906pub enum TransactionType {
907    /// A transaction for API usage.
908    Usage,
909    /// A transaction for a credit reset.
910    Reset,
911    /// A transaction for a credit purchase.
912    Purchase,
913    /// A transaction for a credit refund.
914    Refund,
915}
916
917// Legacy aliases for backward compatibility
918/// A legacy type alias for `MessageRole`.
919pub type ChatRole = MessageRole;
920/// A legacy type alias for `Usage`.
921pub type ChatUsage = Usage;
922/// A legacy type alias for `HealthStatus`.
923pub type HealthCheck = HealthStatus;
924
925/// Represents the status of backend services (legacy).
926#[derive(Debug, Clone, Serialize, Deserialize)]
927pub struct HealthServices {
928    /// The status of the database connection.
929    pub database: bool,
930    /// The status of the Redis connection.
931    pub redis: bool,
932    /// The overall status of AI providers.
933    pub providers: bool,
934}
935
936/// The health status of the API (legacy).
937#[derive(Debug, Clone, Serialize, Deserialize)]
938#[serde(rename_all = "lowercase")]
939pub enum HealthStatusEnum {
940    /// The API is healthy.
941    Healthy,
942    /// The API is in a degraded state.
943    Degraded,
944    /// The API is unhealthy.
945    Unhealthy,
946    /// The API needs initialization.
947    NeedsInit,
948}
949
950/// Represents the format that the model must output.
951#[derive(Debug, Clone, Serialize, Deserialize)]
952#[serde(rename_all = "snake_case")]
953pub enum ResponseFormat {
954    /// The model can return text.
955    Text,
956    /// The model must return a valid JSON object.
957    JsonObject,
958    /// The model must return a JSON object that matches the provided schema.
959    JsonSchema {
960        /// The JSON Schema that the model's output must conform to.
961        json_schema: serde_json::Value,
962    },
963}
964
965/// Represents a tool that the model can use.
966#[derive(Debug, Clone, Serialize, Deserialize)]
967pub struct Tool {
968    /// The type of the tool (currently only "function" is supported).
969    pub r#type: ToolType,
970    /// The function definition describing the tool's capabilities.
971    pub function: FunctionDefinition,
972}
973
974/// The type of tool.
975#[derive(Debug, Clone, Serialize, Deserialize)]
976#[serde(rename_all = "snake_case")]
977pub enum ToolType {
978    /// A function tool.
979    Function,
980}
981
982/// Represents a function definition for a tool.
983#[derive(Debug, Clone, Serialize, Deserialize)]
984pub struct FunctionDefinition {
985    /// The name of the function.
986    pub name: String,
987    /// A description of what the function does.
988    #[serde(skip_serializing_if = "Option::is_none")]
989    pub description: Option<String>,
990    /// The parameters the function accepts, described as a JSON Schema object.
991    #[serde(skip_serializing_if = "Option::is_none")]
992    pub parameters: Option<serde_json::Value>,
993}
994
995/// Controls which tool is called by the model.
996#[derive(Debug, Clone, Serialize, Deserialize)]
997#[serde(untagged)]
998pub enum ToolChoice {
999    /// No tool is called.
1000    None,
1001    /// The model chooses which tool to call.
1002    Auto,
1003    /// A specific tool is called.
1004    Tool {
1005        /// The type of the tool being called.
1006        r#type: ToolType,
1007        /// The function to call within the tool.
1008        function: ToolFunction,
1009    },
1010}
1011
1012/// Represents a tool function call.
1013#[derive(Debug, Clone, Serialize, Deserialize)]
1014pub struct ToolFunction {
1015    /// The name of the function to call.
1016    pub name: String,
1017}
1018
1019/// Configuration for thinking capabilities in Gemini 3 and 2.5 series models.
1020#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1021pub struct ThinkingConfig {
1022    /// Whether to include thought summaries in the response.
1023    #[serde(skip_serializing_if = "Option::is_none")]
1024    pub include_thoughts: Option<bool>,
1025
1026    /// The thinking level for Gemini 3 models (low, high for Pro; minimal, low, medium, high for Flash).
1027    #[serde(skip_serializing_if = "Option::is_none")]
1028    pub thinking_level: Option<ThinkingLevel>,
1029
1030    /// The thinking budget for Gemini 2.5 models (number of thinking tokens).
1031    #[serde(skip_serializing_if = "Option::is_none")]
1032    pub thinking_budget: Option<i32>,
1033}
1034
1035/// Thinking levels for Gemini 3 models.
1036#[derive(Debug, Clone, Serialize, Deserialize)]
1037#[serde(rename_all = "lowercase")]
1038pub enum ThinkingLevel {
1039    /// Minimal thinking (Gemini 3 Flash only) - model likely won't think.
1040    Minimal,
1041    /// Low thinking level - faster responses with basic reasoning.
1042    Low,
1043    /// Medium thinking level (Gemini 3 Flash only) - balanced reasoning and speed.
1044    Medium,
1045    /// High thinking level - deep reasoning for complex tasks (default).
1046    High,
1047}
1048
1049/// Represents a content part that may include thought signatures.
1050#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1051pub struct ContentPart {
1052    /// The text content of the part.
1053    #[serde(skip_serializing_if = "Option::is_none")]
1054    pub text: Option<String>,
1055
1056    /// Function call information if this part contains a function call.
1057    #[serde(skip_serializing_if = "Option::is_none")]
1058    pub function_call: Option<FunctionCall>,
1059
1060    /// Function response information if this part contains a function response.
1061    #[serde(skip_serializing_if = "Option::is_none")]
1062    pub function_response: Option<FunctionResponse>,
1063
1064    /// Indicates if this part contains thought content.
1065    #[serde(skip_serializing_if = "Option::is_none")]
1066    pub thought: Option<bool>,
1067
1068    /// Encrypted thought signature for preserving reasoning context across turns.
1069    #[serde(skip_serializing_if = "Option::is_none")]
1070    pub thought_signature: Option<String>,
1071}
1072
1073/// Represents a function call in the content.
1074#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1075pub struct FunctionCall {
1076    /// The name of the function being called.
1077    pub name: String,
1078    /// The arguments for the function call as a JSON object.
1079    pub args: serde_json::Value,
1080}
1081
1082/// Represents a function response in the content.
1083#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1084pub struct FunctionResponse {
1085    /// The name of the function that was called.
1086    pub name: String,
1087    /// The response from the function call.
1088    pub response: serde_json::Value,
1089}
1090
1091/// Enhanced chat message that supports Gemini 3 thinking capabilities.
1092#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1093pub struct EnhancedChatMessage {
1094    /// The role of the message author.
1095    pub role: MessageRole,
1096    /// The content parts of the message (supports text, function calls, and thought signatures).
1097    pub parts: Vec<ContentPart>,
1098}
1099
1100/// Enhanced usage statistics that include thinking tokens.
1101#[derive(Debug, Clone, Serialize, Deserialize)]
1102pub struct EnhancedUsage {
1103    /// The number of tokens in the prompt.
1104    pub prompt_tokens: u32,
1105    /// The number of tokens in the generated completion.
1106    pub completion_tokens: u32,
1107    /// The total number of tokens used in the request (prompt + completion).
1108    pub total_tokens: u32,
1109    /// The number of thinking tokens used (Gemini 3 and 2.5 series).
1110    #[serde(skip_serializing_if = "Option::is_none")]
1111    pub thoughts_token_count: Option<u32>,
1112}
1113
1114impl ThinkingConfig {
1115    /// Creates a new thinking configuration with default values.
1116    pub fn new() -> Self {
1117        Self::default()
1118    }
1119
1120    /// Creates a configuration for Gemini 3 models with specified thinking level.
1121    ///
1122    /// # Arguments
1123    ///
1124    /// * `level` - The thinking level to use.
1125    /// * `include_thoughts` - Whether to include thought summaries.
1126    pub fn gemini_3(level: ThinkingLevel, include_thoughts: bool) -> Self {
1127        Self {
1128            thinking_level: Some(level),
1129            include_thoughts: Some(include_thoughts),
1130            thinking_budget: None,
1131        }
1132    }
1133
1134    /// Creates a configuration for Gemini 2.5 models with specified thinking budget.
1135    ///
1136    /// # Arguments
1137    ///
1138    /// * `budget` - The thinking budget (-1 for dynamic, 0 to disable, or specific token count).
1139    /// * `include_thoughts` - Whether to include thought summaries.
1140    pub fn gemini_2_5(budget: i32, include_thoughts: bool) -> Self {
1141        Self {
1142            thinking_budget: Some(budget),
1143            include_thoughts: Some(include_thoughts),
1144            thinking_level: None,
1145        }
1146    }
1147
1148    /// Creates a configuration optimized for complex reasoning tasks.
1149    pub fn high_reasoning() -> Self {
1150        Self {
1151            thinking_level: Some(ThinkingLevel::High),
1152            include_thoughts: Some(true),
1153            thinking_budget: Some(-1), // Dynamic for 2.5 models
1154        }
1155    }
1156
1157    /// Creates a configuration optimized for fast responses.
1158    pub fn fast_response() -> Self {
1159        Self {
1160            thinking_level: Some(ThinkingLevel::Low),
1161            include_thoughts: Some(false),
1162            thinking_budget: Some(512), // Low budget for 2.5 models
1163        }
1164    }
1165}
1166
1167impl ContentPart {
1168    /// Creates a new text content part.
1169    pub fn text(content: impl Into<String>) -> Self {
1170        Self {
1171            text: Some(content.into()),
1172            function_call: None,
1173            function_response: None,
1174            thought: None,
1175            thought_signature: None,
1176        }
1177    }
1178
1179    /// Creates a new function call content part.
1180    pub fn function_call(name: impl Into<String>, args: serde_json::Value) -> Self {
1181        Self {
1182            text: None,
1183            function_call: Some(FunctionCall {
1184                name: name.into(),
1185                args,
1186            }),
1187            function_response: None,
1188            thought: None,
1189            thought_signature: None,
1190        }
1191    }
1192
1193    /// Creates a new function response content part.
1194    pub fn function_response(name: impl Into<String>, response: serde_json::Value) -> Self {
1195        Self {
1196            text: None,
1197            function_call: None,
1198            function_response: Some(FunctionResponse {
1199                name: name.into(),
1200                response,
1201            }),
1202            thought: None,
1203            thought_signature: None,
1204        }
1205    }
1206
1207    /// Adds a thought signature to this content part.
1208    pub fn with_thought_signature(mut self, signature: impl Into<String>) -> Self {
1209        self.thought_signature = Some(signature.into());
1210        self
1211    }
1212
1213    /// Marks this content part as containing thought content.
1214    pub fn as_thought(mut self) -> Self {
1215        self.thought = Some(true);
1216        self
1217    }
1218}
1219
1220impl EnhancedChatMessage {
1221    /// Creates a new enhanced message with the `System` role.
1222    pub fn system(content: impl Into<String>) -> Self {
1223        Self {
1224            role: MessageRole::System,
1225            parts: vec![ContentPart::text(content)],
1226        }
1227    }
1228
1229    /// Creates a new enhanced message with the `User` role.
1230    pub fn user(content: impl Into<String>) -> Self {
1231        Self {
1232            role: MessageRole::User,
1233            parts: vec![ContentPart::text(content)],
1234        }
1235    }
1236
1237    /// Creates a new enhanced message with the `Assistant` role.
1238    pub fn assistant(content: impl Into<String>) -> Self {
1239        Self {
1240            role: MessageRole::Assistant,
1241            parts: vec![ContentPart::text(content)],
1242        }
1243    }
1244
1245    /// Creates a new enhanced message with multiple content parts.
1246    pub fn with_parts(role: MessageRole, parts: Vec<ContentPart>) -> Self {
1247        Self { role, parts }
1248    }
1249}
1250
1251/// Represents a streaming chat completion response (OpenAI delta format).
1252#[derive(Debug, Clone, Serialize, Deserialize)]
1253pub struct ChatCompletionStreamResponse {
1254    /// A unique identifier for the chat completion.
1255    pub id: String,
1256    /// The type of object, which is always "chat.completion.chunk".
1257    pub object: String,
1258    /// The Unix timestamp (in seconds) of when the completion was created.
1259    pub created: u64,
1260    /// The model that was used for the completion.
1261    pub model: String,
1262    /// A list of chat completion choices.
1263    pub choices: Vec<ChatCompletionStreamChoice>,
1264    /// Information about the token usage for this completion (only present in the final chunk).
1265    #[serde(skip_serializing_if = "Option::is_none")]
1266    pub usage: Option<Usage>,
1267}
1268
1269/// Represents a single choice in a streaming chat completion response.
1270#[derive(Debug, Clone, Serialize, Deserialize)]
1271pub struct ChatCompletionStreamChoice {
1272    /// The index of the choice in the list of choices.
1273    pub index: u32,
1274    /// The delta containing the new content for this choice.
1275    pub delta: ChatCompletionStreamDelta,
1276    /// The reason the model stopped generating tokens (only present in the final chunk).
1277    #[serde(skip_serializing_if = "Option::is_none")]
1278    pub finish_reason: Option<String>,
1279}
1280
1281/// Represents the delta (change) in a streaming chat completion response.
1282#[derive(Debug, Clone, Serialize, Deserialize)]
1283pub struct ChatCompletionStreamDelta {
1284    /// The role of the message (only present in the first chunk).
1285    #[serde(skip_serializing_if = "Option::is_none")]
1286    pub role: Option<String>,
1287    /// The new content for this chunk.
1288    #[serde(skip_serializing_if = "Option::is_none")]
1289    pub content: Option<String>,
1290    /// The thinking/reasoning content for this chunk (if any).
1291    #[serde(skip_serializing_if = "Option::is_none")]
1292    pub thought: Option<String>,
1293    /// Tool calls for this chunk (if any).
1294    #[serde(skip_serializing_if = "Option::is_none")]
1295    pub tool_calls: Option<Vec<ToolCall>>,
1296}
1297
1298/// Represents a tool call in a streaming response.
1299#[derive(Debug, Clone, Serialize, Deserialize)]
1300pub struct ToolCall {
1301    /// The index of the tool call.
1302    pub index: u32,
1303    /// The ID of the tool call.
1304    #[serde(skip_serializing_if = "Option::is_none")]
1305    pub id: Option<String>,
1306    /// The type of the tool call.
1307    #[serde(skip_serializing_if = "Option::is_none")]
1308    pub r#type: Option<String>,
1309    /// The function being called.
1310    #[serde(skip_serializing_if = "Option::is_none")]
1311    pub function: Option<ToolCallFunction>,
1312}
1313
1314/// Represents a function call in a tool call.
1315#[derive(Debug, Clone, Serialize, Deserialize)]
1316pub struct ToolCallFunction {
1317    /// The name of the function.
1318    #[serde(skip_serializing_if = "Option::is_none")]
1319    pub name: Option<String>,
1320    /// The arguments for the function.
1321    #[serde(skip_serializing_if = "Option::is_none")]
1322    pub arguments: Option<String>,
1323}