rainy_sdk/models.rs
1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4
5/// Represents a single message in a chat conversation.
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
7pub struct ChatMessage {
8 /// The role of the message author.
9 pub role: MessageRole,
10 /// The content of the message.
11 pub content: String,
12}
13
14/// The role of a message's author.
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
16#[serde(rename_all = "lowercase")]
17pub enum MessageRole {
18 /// A message from the system, setting the context or instructions for the assistant.
19 System,
20 /// A message from the user.
21 User,
22 /// A message from the assistant.
23 Assistant,
24}
25
26/// The search provider to use for web research.
27#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
28#[serde(rename_all = "lowercase")]
29pub enum ResearchProvider {
30 /// Use Exa (formerly Metaphor) for high-quality semantic search.
31 #[default]
32 Exa,
33 /// Use Tavily for comprehensive web search and content extraction.
34 Tavily,
35 /// Automatically select the best provider based on the query.
36 Auto,
37}
38
39/// The depth of the research operation.
40#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
41#[serde(rename_all = "lowercase")]
42pub enum ResearchDepth {
43 /// Basic search (faster, lower cost).
44 #[default]
45 Basic,
46 /// Deep search (more thorough, higher cost, includes more context).
47 Advanced,
48}
49
50/// Represents a request to create a chat completion.
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct ChatCompletionRequest {
53 /// The identifier of the model to use for the completion (e.g., "gpt-4o", "claude-sonnet-4").
54 pub model: String,
55
56 /// A list of messages that form the conversation history.
57 pub messages: Vec<ChatMessage>,
58
59 /// The sampling temperature to use, between 0.0 and 2.0. Higher values will make the output
60 /// more random, while lower values will make it more focused and deterministic.
61 #[serde(skip_serializing_if = "Option::is_none")]
62 pub temperature: Option<f32>,
63
64 /// The maximum number of tokens to generate in the completion.
65 #[serde(skip_serializing_if = "Option::is_none")]
66 pub max_tokens: Option<u32>,
67
68 /// The nucleus sampling parameter. The model considers the results of the tokens with `top_p`
69 /// probability mass. So, 0.1 means only the tokens comprising the top 10% probability mass are considered.
70 #[serde(skip_serializing_if = "Option::is_none")]
71 pub top_p: Option<f32>,
72
73 /// A penalty applied to new tokens based on their frequency in the text so far.
74 /// It decreases the model's likelihood to repeat the same line verbatim.
75 #[serde(skip_serializing_if = "Option::is_none")]
76 pub frequency_penalty: Option<f32>,
77
78 /// A penalty applied to new tokens based on whether they appear in the text so far.
79 /// It increases the model's likelihood to talk about new topics.
80 #[serde(skip_serializing_if = "Option::is_none")]
81 pub presence_penalty: Option<f32>,
82
83 /// A list of sequences that will cause the model to stop generating further tokens.
84 #[serde(skip_serializing_if = "Option::is_none")]
85 pub stop: Option<Vec<String>>,
86
87 /// A unique identifier representing your end-user, which can help in monitoring and
88 /// tracking conversations.
89 #[serde(skip_serializing_if = "Option::is_none")]
90 pub user: Option<String>,
91
92 /// A hint to the router about which provider to use for the model.
93 #[serde(skip_serializing_if = "Option::is_none")]
94 pub provider: Option<String>,
95
96 /// If set to `true`, the response will be streamed as a series of events.
97 #[serde(skip_serializing_if = "Option::is_none")]
98 pub stream: Option<bool>,
99
100 /// Modify the likelihood of specified tokens appearing in the completion.
101 #[serde(skip_serializing_if = "Option::is_none")]
102 pub logit_bias: Option<serde_json::Value>,
103
104 /// Whether to return log probabilities of the output tokens.
105 #[serde(skip_serializing_if = "Option::is_none")]
106 pub logprobs: Option<bool>,
107
108 /// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position.
109 #[serde(skip_serializing_if = "Option::is_none")]
110 pub top_logprobs: Option<u32>,
111
112 /// How many chat completion choices to generate for each input message.
113 #[serde(skip_serializing_if = "Option::is_none")]
114 pub n: Option<u32>,
115
116 /// An object specifying the format that the model must output.
117 #[serde(skip_serializing_if = "Option::is_none")]
118 pub response_format: Option<ResponseFormat>,
119
120 /// A list of tools the model may call.
121 #[serde(skip_serializing_if = "Option::is_none")]
122 pub tools: Option<Vec<Tool>>,
123
124 /// Controls which (if any) tool is called by the model.
125 #[serde(skip_serializing_if = "Option::is_none")]
126 pub tool_choice: Option<ToolChoice>,
127
128 /// Configuration for thinking capabilities (Gemini 3 and 2.5 series).
129 #[serde(skip_serializing_if = "Option::is_none")]
130 pub thinking_config: Option<ThinkingConfig>,
131}
132
133/// Represents the response from a chat completion request.
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct ChatCompletionResponse {
136 /// A unique identifier for the chat completion.
137 pub id: String,
138
139 /// The type of object, which is always "chat.completion".
140 pub object: String,
141
142 /// The Unix timestamp (in seconds) of when the completion was created.
143 pub created: u64,
144
145 /// The model that was used for the completion.
146 pub model: String,
147
148 /// A list of chat completion choices.
149 pub choices: Vec<ChatChoice>,
150
151 /// Information about the token usage for this completion.
152 #[serde(skip_serializing_if = "Option::is_none")]
153 pub usage: Option<Usage>,
154}
155
156/// Represents a chunk of a streaming chat completion response.
157#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct ChatCompletionChunk {
159 /// A unique identifier for the chat completion.
160 pub id: String,
161
162 /// The type of object, which is always "chat.completion.chunk".
163 pub object: String,
164
165 /// The Unix timestamp (in seconds) of when the completion was created.
166 pub created: u64,
167
168 /// The model that was used for the completion.
169 pub model: String,
170
171 /// A list of chat completion choices.
172 pub choices: Vec<ChatCompletionChunkChoice>,
173}
174
175/// Represents a single choice in a streaming chat completion response.
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct ChatCompletionChunkChoice {
178 /// The index of the choice in the list of choices.
179 pub index: u32,
180
181 /// A delta payload with the content that has changed since the last chunk.
182 pub delta: ChatCompletionChunkDelta,
183
184 /// The reason the model stopped generating tokens.
185 #[serde(skip_serializing_if = "Option::is_none")]
186 pub finish_reason: Option<String>,
187}
188
189/// Represents the delta payload of a streaming chat completion chunk.
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct ChatCompletionChunkDelta {
192 /// The role of the message author.
193 #[serde(skip_serializing_if = "Option::is_none")]
194 pub role: Option<MessageRole>,
195
196 /// The content of the message.
197 #[serde(skip_serializing_if = "Option::is_none")]
198 pub content: Option<String>,
199
200 /// The thinking content (for Gemini 3 models).
201 #[serde(skip_serializing_if = "Option::is_none")]
202 pub thought: Option<String>,
203}
204
205/// Represents a single choice in a chat completion response.
206#[derive(Debug, Clone, Serialize, Deserialize)]
207pub struct ChatChoice {
208 /// The index of the choice in the list of choices.
209 pub index: u32,
210
211 /// The message generated by the model.
212 pub message: ChatMessage,
213
214 /// The reason the model stopped generating tokens.
215 pub finish_reason: String,
216}
217
218/// Represents the token usage statistics for a chat completion.
219#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct Usage {
221 /// The number of tokens in the prompt.
222 pub prompt_tokens: u32,
223
224 /// The number of tokens in the generated completion.
225 pub completion_tokens: u32,
226
227 /// The total number of tokens used in the request (prompt + completion).
228 pub total_tokens: u32,
229}
230
231/// Represents the health status of the Rainy API.
232#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct HealthStatus {
234 /// The overall status of the API (e.g., "healthy", "degraded").
235 pub status: String,
236
237 /// The timestamp of when the health check was performed.
238 pub timestamp: String,
239
240 /// The uptime of the system in seconds.
241 pub uptime: f64,
242
243 /// The status of individual services.
244 pub services: ServiceStatus,
245}
246
247/// Represents the status of individual backend services.
248#[derive(Debug, Clone, Serialize, Deserialize)]
249pub struct ServiceStatus {
250 /// The status of the database connection.
251 pub database: bool,
252
253 /// The status of the Redis connection, if applicable.
254 #[serde(skip_serializing_if = "Option::is_none")]
255 pub redis: Option<bool>,
256
257 /// The overall status of the connections to AI providers.
258 pub providers: bool,
259}
260
261/// Represents the available models and providers.
262#[derive(Debug, Clone, Serialize, Deserialize, Default)]
263pub struct AvailableModels {
264 /// A map where keys are provider names and values are lists of model names.
265 #[serde(default)]
266 pub providers: HashMap<String, Vec<String>>,
267
268 /// The total number of available models across all providers.
269 #[serde(default)]
270 pub total_models: usize,
271
272 /// A list of provider names that are currently active and available.
273 #[serde(default)]
274 pub active_providers: Vec<String>,
275}
276
277/// Represents information about credit usage for a request.
278#[derive(Debug, Clone, Serialize, Deserialize)]
279pub struct CreditInfo {
280 /// The number of credits available before the request.
281 pub current_credits: f64,
282
283 /// The estimated number of credits that the request will cost.
284 pub estimated_cost: f64,
285
286 /// The estimated number of credits remaining after the request.
287 pub credits_after_request: f64,
288
289 /// The date when the credit balance is next scheduled to be reset.
290 pub reset_date: String,
291}
292
293/// Represents metadata extracted from the response headers of an API request.
294#[derive(Debug, Clone)]
295pub struct RequestMetadata {
296 /// The time taken for the request to complete, in milliseconds.
297 pub response_time: Option<u64>,
298
299 /// The AI provider that handled the request.
300 pub provider: Option<String>,
301
302 /// The number of tokens used in the request.
303 pub tokens_used: Option<u32>,
304
305 /// The number of credits used for the request.
306 pub credits_used: Option<f64>,
307
308 /// The number of credits remaining after the request.
309 pub credits_remaining: Option<f64>,
310
311 /// The unique ID of the request, for tracking and debugging.
312 pub request_id: Option<String>,
313}
314
315/// A collection of predefined model constants for convenience.
316/// All models listed here are confirmed to be 100% OpenAI-compatible without parameter adaptations.
317pub mod model_constants {
318 // OpenAI models (fully compatible)
319 /// Constant for the GPT-4o model.
320 pub const OPENAI_GPT_4O: &str = "gpt-4o";
321 /// Constant for the GPT-5 model.
322 pub const OPENAI_GPT_5: &str = "gpt-5";
323 /// Constant for the GPT-5 Pro model.
324 pub const OPENAI_GPT_5_PRO: &str = "gpt-5-pro";
325 /// Constant for the O3 model.
326 pub const OPENAI_O3: &str = "o3";
327 /// Constant for the O4 Mini model.
328 pub const OPENAI_O4_MINI: &str = "o4-mini";
329
330 // Google Gemini models (fully compatible via official compatibility layer)
331 /// Constant for the Gemini 2.5 Pro model.
332 pub const GOOGLE_GEMINI_2_5_PRO: &str = "gemini-2.5-pro";
333 /// Constant for the Gemini 2.5 Flash model.
334 pub const GOOGLE_GEMINI_2_5_FLASH: &str = "gemini-2.5-flash";
335 /// Constant for the Gemini 2.5 Flash Lite model.
336 pub const GOOGLE_GEMINI_2_5_FLASH_LITE: &str = "gemini-2.5-flash-lite";
337
338 // Gemini 3 series - Advanced reasoning models with thinking capabilities
339 /// Constant for the Gemini 3 Pro model with advanced reasoning.
340 pub const GOOGLE_GEMINI_3_PRO: &str = "gemini-3-pro-preview";
341 /// Constant for the Gemini 3 Flash model with thinking capabilities.
342 pub const GOOGLE_GEMINI_3_FLASH: &str = "gemini-3-flash-preview";
343 /// Constant for the Gemini 3 Pro Image model with multimodal reasoning.
344 pub const GOOGLE_GEMINI_3_PRO_IMAGE: &str = "gemini-3-pro-image-preview";
345
346 // Groq models (fully compatible)
347 /// Constant for the Llama 3.1 8B Instant model.
348 pub const GROQ_LLAMA_3_1_8B_INSTANT: &str = "llama-3.1-8b-instant";
349 /// Constant for the Llama 3.3 70B Versatile model.
350 pub const GROQ_LLAMA_3_3_70B_VERSATILE: &str = "llama-3.3-70b-versatile";
351 /// Constant for the moonshotai/kimi-k2-instruct-0905 Instant model.
352 pub const KIMI_K2_0925: &str = "moonshotai/kimi-k2-instruct-0905";
353
354 // Cerebras models (fully compatible)
355 /// Constant for the Llama3.1 8B model.
356 pub const CEREBRAS_LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
357
358 // Enosis Labs models (fully compatible)
359 /// Constant for the Astronomer 1 model.
360 pub const ASTRONOMER_1: &str = "astronomer-1";
361 /// Constant for the Astronomer 1 Max model.
362 pub const ASTRONOMER_1_MAX: &str = "astronomer-1-max";
363 /// Constant for the Astronomer 1.5 model.
364 pub const ASTRONOMER_1_5: &str = "astronomer-1.5";
365 /// Constant for the Astronomer 2 model.
366 pub const ASTRONOMER_2: &str = "astronomer-2";
367 /// Constant for the Astronomer 2 Pro model.
368 pub const ASTRONOMER_2_PRO: &str = "astronomer-2-pro";
369
370 // Legacy aliases for backward compatibility (deprecated - use provider-prefixed versions above)
371 /// Legacy constant for the GPT-4o model (use OPENAI_GPT_4O instead).
372 #[deprecated(note = "Use OPENAI_GPT_4O instead for OpenAI compatibility")]
373 pub const GPT_4O: &str = "openai/gpt-4o";
374 /// Legacy constant for the GPT-5 model (use OPENAI_GPT_5 instead).
375 #[deprecated(note = "Use OPENAI_GPT_5 instead for OpenAI compatibility")]
376 pub const GPT_5: &str = "openai/gpt-5";
377 /// Legacy constant for the Gemini 2.5 Pro model (use GOOGLE_GEMINI_2_5_PRO instead).
378 #[deprecated(note = "Use GOOGLE_GEMINI_2_5_PRO instead for OpenAI compatibility")]
379 pub const GEMINI_2_5_PRO: &str = "google/gemini-2.5-pro";
380 /// Legacy constant for the Gemini 2.5 Flash model (use GOOGLE_GEMINI_2_5_FLASH instead).
381 #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH instead for OpenAI compatibility")]
382 pub const GEMINI_2_5_FLASH: &str = "google/gemini-2.5-flash";
383 /// Legacy constant for the Gemini 2.5 Flash Lite model (use GOOGLE_GEMINI_2_5_FLASH_LITE instead).
384 #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH_LITE instead for OpenAI compatibility")]
385 pub const GEMINI_2_5_FLASH_LITE: &str = "google/gemini-2.5-flash-lite";
386 /// Legacy constant for the Llama 3.1 8B Instant model (use GROQ_LLAMA_3_1_8B_INSTANT instead).
387 #[deprecated(note = "Use GROQ_LLAMA_3_1_8B_INSTANT instead for OpenAI compatibility")]
388 pub const LLAMA_3_1_8B_INSTANT: &str = "groq/llama-3.1-8b-instant";
389 /// Legacy constant for the Llama3.1 8B model (use CEREBRAS_LLAMA3_1_8B instead).
390 #[deprecated(note = "Use CEREBRAS_LLAMA3_1_8B instead for OpenAI compatibility")]
391 pub const LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
392}
393
394/// A collection of predefined provider name constants for convenience.
395pub mod providers {
396 /// Constant for the OpenAI provider.
397 pub const OPENAI: &str = "openai";
398 /// Constant for the Anthropic provider.
399 pub const ANTHROPIC: &str = "anthropic";
400 /// Constant for the Groq provider.
401 pub const GROQ: &str = "groq";
402 /// Constant for the Cerebras provider.
403 pub const CEREBRAS: &str = "cerebras";
404 /// Constant for the Gemini provider.
405 pub const GEMINI: &str = "gemini";
406 /// Constant for the Enosis Labs provider.
407 pub const ENOSISLABS: &str = "enosislabs";
408}
409
410impl ChatCompletionRequest {
411 /// Creates a new `ChatCompletionRequest` with the given model and messages.
412 ///
413 /// # Arguments
414 ///
415 /// * `model` - The identifier of the model to use.
416 /// * `messages` - The list of messages for the conversation.
417 pub fn new(model: impl Into<String>, messages: Vec<ChatMessage>) -> Self {
418 Self {
419 model: model.into(),
420 messages,
421 temperature: None,
422 max_tokens: None,
423 top_p: None,
424 frequency_penalty: None,
425 presence_penalty: None,
426 stop: None,
427 user: None,
428 provider: None,
429 stream: None,
430 logit_bias: None,
431 logprobs: None,
432 top_logprobs: None,
433 n: None,
434 response_format: None,
435 tools: None,
436 tool_choice: None,
437 thinking_config: None,
438 }
439 }
440
441 /// Sets the temperature for the chat completion.
442 ///
443 /// The temperature is clamped between 0.0 and 2.0.
444 ///
445 /// # Arguments
446 ///
447 /// * `temperature` - The sampling temperature.
448 pub fn with_temperature(mut self, temperature: f32) -> Self {
449 self.temperature = Some(temperature.clamp(0.0, 2.0));
450 self
451 }
452
453 /// Sets the maximum number of tokens to generate.
454 ///
455 /// # Arguments
456 ///
457 /// * `max_tokens` - The maximum number of tokens.
458 pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
459 self.max_tokens = Some(max_tokens);
460 self
461 }
462
463 /// Sets the user identifier for the chat completion.
464 ///
465 /// # Arguments
466 ///
467 /// * `user` - A unique identifier for the end-user.
468 pub fn with_user(mut self, user: impl Into<String>) -> Self {
469 self.user = Some(user.into());
470 self
471 }
472
473 /// Sets a provider hint for the request.
474 ///
475 /// # Arguments
476 ///
477 /// * `provider` - The name of the provider to use.
478 pub fn with_provider(mut self, provider: impl Into<String>) -> Self {
479 self.provider = Some(provider.into());
480 self
481 }
482
483 /// Enables or disables streaming for the response.
484 ///
485 /// # Arguments
486 ///
487 /// * `stream` - `true` to enable streaming, `false` to disable.
488 pub fn with_stream(mut self, stream: bool) -> Self {
489 self.stream = Some(stream);
490 self
491 }
492
493 /// Sets the logit bias for the chat completion.
494 ///
495 /// # Arguments
496 ///
497 /// * `logit_bias` - A map of token IDs to bias values.
498 pub fn with_logit_bias(mut self, logit_bias: serde_json::Value) -> Self {
499 self.logit_bias = Some(logit_bias);
500 self
501 }
502
503 /// Enables or disables log probabilities for the response.
504 ///
505 /// # Arguments
506 ///
507 /// * `logprobs` - `true` to include log probabilities.
508 pub fn with_logprobs(mut self, logprobs: bool) -> Self {
509 self.logprobs = Some(logprobs);
510 self
511 }
512
513 /// Sets the number of most likely tokens to return at each position.
514 ///
515 /// # Arguments
516 ///
517 /// * `top_logprobs` - The number of top log probabilities to return.
518 pub fn with_top_logprobs(mut self, top_logprobs: u32) -> Self {
519 self.top_logprobs = Some(top_logprobs);
520 self
521 }
522
523 /// Sets the number of chat completion choices to generate.
524 ///
525 /// # Arguments
526 ///
527 /// * `n` - The number of completions to generate.
528 pub fn with_n(mut self, n: u32) -> Self {
529 self.n = Some(n);
530 self
531 }
532
533 /// Sets the response format for the chat completion.
534 ///
535 /// # Arguments
536 ///
537 /// * `response_format` - The format the model must output.
538 pub fn with_response_format(mut self, response_format: ResponseFormat) -> Self {
539 self.response_format = Some(response_format);
540 self
541 }
542
543 /// Sets the tools available to the model.
544 ///
545 /// # Arguments
546 ///
547 /// * `tools` - A list of tools the model can use.
548 pub fn with_tools(mut self, tools: Vec<Tool>) -> Self {
549 self.tools = Some(tools);
550 self
551 }
552
553 /// Sets the tool choice for the chat completion.
554 ///
555 /// # Arguments
556 ///
557 /// * `tool_choice` - Controls which tool the model uses.
558 pub fn with_tool_choice(mut self, tool_choice: ToolChoice) -> Self {
559 self.tool_choice = Some(tool_choice);
560 self
561 }
562
563 /// Sets the thinking configuration for Gemini 3 and 2.5 series models.
564 ///
565 /// # Arguments
566 ///
567 /// * `thinking_config` - Configuration for thinking capabilities.
568 pub fn with_thinking_config(mut self, thinking_config: ThinkingConfig) -> Self {
569 self.thinking_config = Some(thinking_config);
570 self
571 }
572
573 /// Enables thought summaries in the response (Gemini 3 and 2.5 series).
574 ///
575 /// # Arguments
576 ///
577 /// * `include_thoughts` - Whether to include thought summaries.
578 pub fn with_include_thoughts(mut self, include_thoughts: bool) -> Self {
579 let mut config = self.thinking_config.unwrap_or_default();
580 config.include_thoughts = Some(include_thoughts);
581 self.thinking_config = Some(config);
582 self
583 }
584
585 /// Sets the thinking level for Gemini 3 models.
586 ///
587 /// # Arguments
588 ///
589 /// * `thinking_level` - The thinking level (minimal, low, medium, high).
590 pub fn with_thinking_level(mut self, thinking_level: ThinkingLevel) -> Self {
591 let mut config = self.thinking_config.unwrap_or_default();
592 config.thinking_level = Some(thinking_level);
593 self.thinking_config = Some(config);
594 self
595 }
596
597 /// Sets the thinking budget for Gemini 2.5 models.
598 ///
599 /// # Arguments
600 ///
601 /// * `thinking_budget` - Number of thinking tokens (-1 for dynamic, 0 to disable).
602 pub fn with_thinking_budget(mut self, thinking_budget: i32) -> Self {
603 let mut config = self.thinking_config.unwrap_or_default();
604 config.thinking_budget = Some(thinking_budget);
605 self.thinking_config = Some(config);
606 self
607 }
608
609 /// Validates that the request parameters are compatible with OpenAI standards.
610 ///
611 /// This method checks parameter ranges and values to ensure they match OpenAI's API specifications.
612 /// Also validates Gemini 3 specific parameters like thinking configuration.
613 ///
614 /// # Returns
615 ///
616 /// A `Result` indicating whether the request is valid for OpenAI compatibility.
617 pub fn validate_openai_compatibility(&self) -> Result<(), String> {
618 // Validate temperature
619 if let Some(temp) = self.temperature {
620 if !(0.0..=2.0).contains(&temp) {
621 return Err(format!(
622 "Temperature must be between 0.0 and 2.0, got {}",
623 temp
624 ));
625 }
626 }
627
628 // Validate top_p
629 if let Some(top_p) = self.top_p {
630 if !(0.0..=1.0).contains(&top_p) {
631 return Err(format!("Top-p must be between 0.0 and 1.0, got {}", top_p));
632 }
633 }
634
635 // Validate frequency_penalty
636 if let Some(fp) = self.frequency_penalty {
637 if !(-2.0..=2.0).contains(&fp) {
638 return Err(format!(
639 "Frequency penalty must be between -2.0 and 2.0, got {}",
640 fp
641 ));
642 }
643 }
644
645 // Validate presence_penalty
646 if let Some(pp) = self.presence_penalty {
647 if !(-2.0..=2.0).contains(&pp) {
648 return Err(format!(
649 "Presence penalty must be between -2.0 and 2.0, got {}",
650 pp
651 ));
652 }
653 }
654
655 // Validate max_tokens
656 if let Some(mt) = self.max_tokens {
657 if mt == 0 {
658 return Err("Max tokens must be greater than 0".to_string());
659 }
660 }
661
662 // Validate top_logprobs
663 if let Some(tlp) = self.top_logprobs {
664 if !(0..=20).contains(&tlp) {
665 return Err(format!(
666 "Top logprobs must be between 0 and 20, got {}",
667 tlp
668 ));
669 }
670 }
671
672 // Validate n
673 if let Some(n) = self.n {
674 if n == 0 {
675 return Err("n must be greater than 0".to_string());
676 }
677 }
678
679 // Validate stop sequences
680 if let Some(stop) = &self.stop {
681 if stop.len() > 4 {
682 return Err("Cannot have more than 4 stop sequences".to_string());
683 }
684 for seq in stop {
685 if seq.is_empty() {
686 return Err("Stop sequences cannot be empty".to_string());
687 }
688 if seq.len() > 64 {
689 return Err("Stop sequences cannot be longer than 64 characters".to_string());
690 }
691 }
692 }
693
694 // Validate thinking configuration for Gemini models
695 if let Some(thinking_config) = &self.thinking_config {
696 self.validate_thinking_config(thinking_config)?;
697 }
698
699 Ok(())
700 }
701
702 /// Validates thinking configuration parameters for Gemini models.
703 fn validate_thinking_config(&self, config: &ThinkingConfig) -> Result<(), String> {
704 let is_gemini_3 = self.model.contains("gemini-3");
705 let is_gemini_2_5 = self.model.contains("gemini-2.5");
706 let is_gemini_3_pro = self.model.contains("gemini-3-pro");
707
708 // Validate thinking level (Gemini 3 only)
709 if let Some(level) = &config.thinking_level {
710 if !is_gemini_3 {
711 return Err("thinking_level is only supported for Gemini 3 models".to_string());
712 }
713
714 match level {
715 ThinkingLevel::Minimal | ThinkingLevel::Medium => {
716 if is_gemini_3_pro {
717 return Err(
718 "Gemini 3 Pro only supports 'low' and 'high' thinking levels"
719 .to_string(),
720 );
721 }
722 }
723 _ => {}
724 }
725 }
726
727 // Validate thinking budget (Gemini 2.5 only)
728 if let Some(budget) = config.thinking_budget {
729 if !is_gemini_2_5 {
730 return Err("thinking_budget is only supported for Gemini 2.5 models".to_string());
731 }
732
733 // Validate budget ranges based on model
734 if self.model.contains("2.5-pro") {
735 if budget != -1 && !(128..=32768).contains(&budget) {
736 return Err(
737 "Gemini 2.5 Pro thinking budget must be -1 (dynamic) or between 128-32768"
738 .to_string(),
739 );
740 }
741 } else if self.model.contains("2.5-flash")
742 && budget != -1
743 && !(0..=24576).contains(&budget)
744 {
745 return Err(
746 "Gemini 2.5 Flash thinking budget must be -1 (dynamic) or between 0-24576"
747 .to_string(),
748 );
749 }
750 }
751
752 // Warn about conflicting parameters
753 if config.thinking_level.is_some() && config.thinking_budget.is_some() {
754 return Err("Cannot specify both thinking_level (Gemini 3) and thinking_budget (Gemini 2.5) in the same request".to_string());
755 }
756
757 Ok(())
758 }
759
760 /// Checks if the model supports thinking capabilities.
761 pub fn supports_thinking(&self) -> bool {
762 self.model.contains("gemini-3") || self.model.contains("gemini-2.5")
763 }
764
765 /// Checks if the model requires thought signatures for function calling.
766 pub fn requires_thought_signatures(&self) -> bool {
767 self.model.contains("gemini-3")
768 }
769}
770
771impl ChatMessage {
772 /// Creates a new message with the `System` role.
773 ///
774 /// # Arguments
775 ///
776 /// * `content` - The content of the system message.
777 pub fn system(content: impl Into<String>) -> Self {
778 Self {
779 role: MessageRole::System,
780 content: content.into(),
781 }
782 }
783
784 /// Creates a new message with the `User` role.
785 ///
786 /// # Arguments
787 ///
788 /// * `content` - The content of the user message.
789 pub fn user(content: impl Into<String>) -> Self {
790 Self {
791 role: MessageRole::User,
792 content: content.into(),
793 }
794 }
795
796 /// Creates a new message with the `Assistant` role.
797 ///
798 /// # Arguments
799 ///
800 /// * `content` - The content of the assistant message.
801 pub fn assistant(content: impl Into<String>) -> Self {
802 Self {
803 role: MessageRole::Assistant,
804 content: content.into(),
805 }
806 }
807}
808
809// Legacy compatibility types - keep existing types for backward compatibility
810use uuid::Uuid;
811
812/// Represents a user account (legacy).
813#[derive(Debug, Clone, Serialize, Deserialize)]
814pub struct User {
815 /// The unique ID of the user.
816 pub id: Uuid,
817 /// The user's identifier string.
818 pub user_id: String,
819 /// The name of the user's subscription plan.
820 pub plan_name: String,
821 /// The user's current credit balance.
822 pub current_credits: f64,
823 /// The amount of credits the user has used in the current month.
824 pub credits_used_this_month: f64,
825 /// The date when the user's credits will reset.
826 pub credits_reset_date: DateTime<Utc>,
827 /// Indicates if the user account is active.
828 pub is_active: bool,
829 /// The timestamp of when the user account was created.
830 pub created_at: DateTime<Utc>,
831}
832
833/// Represents an API key (legacy).
834#[derive(Debug, Clone, Serialize, Deserialize)]
835pub struct ApiKey {
836 /// The unique ID of the API key.
837 pub id: Uuid,
838 /// The API key string.
839 pub key: String,
840 /// The ID of the user who owns the key.
841 pub owner_id: Uuid,
842 /// Indicates if the API key is active.
843 pub is_active: bool,
844 /// The timestamp of when the key was created.
845 pub created_at: DateTime<Utc>,
846 /// The expiration date of the key, if any.
847 pub expires_at: Option<DateTime<Utc>>,
848 /// A description of the key.
849 pub description: Option<String>,
850 /// The timestamp of when the key was last used.
851 pub last_used_at: Option<DateTime<Utc>>,
852}
853
854/// Represents usage statistics over a period (legacy).
855#[derive(Debug, Clone, Serialize, Deserialize)]
856pub struct UsageStats {
857 /// The number of days in the usage period.
858 pub period_days: u32,
859 /// A list of daily usage data.
860 pub daily_usage: Vec<DailyUsage>,
861 /// A list of recent credit transactions.
862 pub recent_transactions: Vec<CreditTransaction>,
863 /// The total number of requests made in the period.
864 pub total_requests: u64,
865 /// The total number of tokens used in the period.
866 pub total_tokens: u64,
867}
868
869/// Represents usage data for a single day (legacy).
870#[derive(Debug, Clone, Serialize, Deserialize)]
871pub struct DailyUsage {
872 /// The date for the usage data.
873 pub date: String,
874 /// The number of credits used on this day.
875 pub credits_used: f64,
876 /// The number of requests made on this day.
877 pub requests: u64,
878 /// The number of tokens used on this day.
879 pub tokens: u64,
880}
881
882/// Represents a single credit transaction (legacy).
883#[derive(Debug, Clone, Serialize, Deserialize)]
884pub struct CreditTransaction {
885 /// The unique ID of the transaction.
886 pub id: Uuid,
887 /// The type of the transaction.
888 pub transaction_type: TransactionType,
889 /// The amount of credits involved in the transaction.
890 pub credits_amount: f64,
891 /// The credit balance after the transaction.
892 pub credits_balance_after: f64,
893 /// The provider associated with the transaction, if any.
894 pub provider: Option<String>,
895 /// The model associated with the transaction, if any.
896 pub model: Option<String>,
897 /// A description of the transaction.
898 pub description: String,
899 /// The timestamp of when the transaction occurred.
900 pub created_at: DateTime<Utc>,
901}
902
903/// The type of credit transaction (legacy).
904#[derive(Debug, Clone, Serialize, Deserialize)]
905#[serde(rename_all = "lowercase")]
906pub enum TransactionType {
907 /// A transaction for API usage.
908 Usage,
909 /// A transaction for a credit reset.
910 Reset,
911 /// A transaction for a credit purchase.
912 Purchase,
913 /// A transaction for a credit refund.
914 Refund,
915}
916
917// Legacy aliases for backward compatibility
918/// A legacy type alias for `MessageRole`.
919pub type ChatRole = MessageRole;
920/// A legacy type alias for `Usage`.
921pub type ChatUsage = Usage;
922/// A legacy type alias for `HealthStatus`.
923pub type HealthCheck = HealthStatus;
924
925/// Represents the status of backend services (legacy).
926#[derive(Debug, Clone, Serialize, Deserialize)]
927pub struct HealthServices {
928 /// The status of the database connection.
929 pub database: bool,
930 /// The status of the Redis connection.
931 pub redis: bool,
932 /// The overall status of AI providers.
933 pub providers: bool,
934}
935
936/// The health status of the API (legacy).
937#[derive(Debug, Clone, Serialize, Deserialize)]
938#[serde(rename_all = "lowercase")]
939pub enum HealthStatusEnum {
940 /// The API is healthy.
941 Healthy,
942 /// The API is in a degraded state.
943 Degraded,
944 /// The API is unhealthy.
945 Unhealthy,
946 /// The API needs initialization.
947 NeedsInit,
948}
949
950/// Represents the format that the model must output.
951#[derive(Debug, Clone, Serialize, Deserialize)]
952#[serde(rename_all = "snake_case")]
953pub enum ResponseFormat {
954 /// The model can return text.
955 Text,
956 /// The model must return a valid JSON object.
957 JsonObject,
958 /// The model must return a JSON object that matches the provided schema.
959 JsonSchema {
960 /// The JSON Schema that the model's output must conform to.
961 json_schema: serde_json::Value,
962 },
963}
964
965/// Represents a tool that the model can use.
966#[derive(Debug, Clone, Serialize, Deserialize)]
967pub struct Tool {
968 /// The type of the tool (currently only "function" is supported).
969 pub r#type: ToolType,
970 /// The function definition describing the tool's capabilities.
971 pub function: FunctionDefinition,
972}
973
974/// The type of tool.
975#[derive(Debug, Clone, Serialize, Deserialize)]
976#[serde(rename_all = "snake_case")]
977pub enum ToolType {
978 /// A function tool.
979 Function,
980}
981
982/// Represents a function definition for a tool.
983#[derive(Debug, Clone, Serialize, Deserialize)]
984pub struct FunctionDefinition {
985 /// The name of the function.
986 pub name: String,
987 /// A description of what the function does.
988 #[serde(skip_serializing_if = "Option::is_none")]
989 pub description: Option<String>,
990 /// The parameters the function accepts, described as a JSON Schema object.
991 #[serde(skip_serializing_if = "Option::is_none")]
992 pub parameters: Option<serde_json::Value>,
993}
994
995/// Controls which tool is called by the model.
996#[derive(Debug, Clone, Serialize, Deserialize)]
997#[serde(untagged)]
998pub enum ToolChoice {
999 /// No tool is called.
1000 None,
1001 /// The model chooses which tool to call.
1002 Auto,
1003 /// A specific tool is called.
1004 Tool {
1005 /// The type of the tool being called.
1006 r#type: ToolType,
1007 /// The function to call within the tool.
1008 function: ToolFunction,
1009 },
1010}
1011
1012/// Represents a tool function call.
1013#[derive(Debug, Clone, Serialize, Deserialize)]
1014pub struct ToolFunction {
1015 /// The name of the function to call.
1016 pub name: String,
1017}
1018
1019/// Configuration for thinking capabilities in Gemini 3 and 2.5 series models.
1020#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1021pub struct ThinkingConfig {
1022 /// Whether to include thought summaries in the response.
1023 #[serde(skip_serializing_if = "Option::is_none")]
1024 pub include_thoughts: Option<bool>,
1025
1026 /// The thinking level for Gemini 3 models (low, high for Pro; minimal, low, medium, high for Flash).
1027 #[serde(skip_serializing_if = "Option::is_none")]
1028 pub thinking_level: Option<ThinkingLevel>,
1029
1030 /// The thinking budget for Gemini 2.5 models (number of thinking tokens).
1031 #[serde(skip_serializing_if = "Option::is_none")]
1032 pub thinking_budget: Option<i32>,
1033}
1034
1035/// Thinking levels for Gemini 3 models.
1036#[derive(Debug, Clone, Serialize, Deserialize)]
1037#[serde(rename_all = "lowercase")]
1038pub enum ThinkingLevel {
1039 /// Minimal thinking (Gemini 3 Flash only) - model likely won't think.
1040 Minimal,
1041 /// Low thinking level - faster responses with basic reasoning.
1042 Low,
1043 /// Medium thinking level (Gemini 3 Flash only) - balanced reasoning and speed.
1044 Medium,
1045 /// High thinking level - deep reasoning for complex tasks (default).
1046 High,
1047}
1048
1049/// Represents a content part that may include thought signatures.
1050#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1051pub struct ContentPart {
1052 /// The text content of the part.
1053 #[serde(skip_serializing_if = "Option::is_none")]
1054 pub text: Option<String>,
1055
1056 /// Function call information if this part contains a function call.
1057 #[serde(skip_serializing_if = "Option::is_none")]
1058 pub function_call: Option<FunctionCall>,
1059
1060 /// Function response information if this part contains a function response.
1061 #[serde(skip_serializing_if = "Option::is_none")]
1062 pub function_response: Option<FunctionResponse>,
1063
1064 /// Indicates if this part contains thought content.
1065 #[serde(skip_serializing_if = "Option::is_none")]
1066 pub thought: Option<bool>,
1067
1068 /// Encrypted thought signature for preserving reasoning context across turns.
1069 #[serde(skip_serializing_if = "Option::is_none")]
1070 pub thought_signature: Option<String>,
1071}
1072
1073/// Represents a function call in the content.
1074#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1075pub struct FunctionCall {
1076 /// The name of the function being called.
1077 pub name: String,
1078 /// The arguments for the function call as a JSON object.
1079 pub args: serde_json::Value,
1080}
1081
1082/// Represents a function response in the content.
1083#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1084pub struct FunctionResponse {
1085 /// The name of the function that was called.
1086 pub name: String,
1087 /// The response from the function call.
1088 pub response: serde_json::Value,
1089}
1090
1091/// Enhanced chat message that supports Gemini 3 thinking capabilities.
1092#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1093pub struct EnhancedChatMessage {
1094 /// The role of the message author.
1095 pub role: MessageRole,
1096 /// The content parts of the message (supports text, function calls, and thought signatures).
1097 pub parts: Vec<ContentPart>,
1098}
1099
1100/// Enhanced usage statistics that include thinking tokens.
1101#[derive(Debug, Clone, Serialize, Deserialize)]
1102pub struct EnhancedUsage {
1103 /// The number of tokens in the prompt.
1104 pub prompt_tokens: u32,
1105 /// The number of tokens in the generated completion.
1106 pub completion_tokens: u32,
1107 /// The total number of tokens used in the request (prompt + completion).
1108 pub total_tokens: u32,
1109 /// The number of thinking tokens used (Gemini 3 and 2.5 series).
1110 #[serde(skip_serializing_if = "Option::is_none")]
1111 pub thoughts_token_count: Option<u32>,
1112}
1113
1114impl ThinkingConfig {
1115 /// Creates a new thinking configuration with default values.
1116 pub fn new() -> Self {
1117 Self::default()
1118 }
1119
1120 /// Creates a configuration for Gemini 3 models with specified thinking level.
1121 ///
1122 /// # Arguments
1123 ///
1124 /// * `level` - The thinking level to use.
1125 /// * `include_thoughts` - Whether to include thought summaries.
1126 pub fn gemini_3(level: ThinkingLevel, include_thoughts: bool) -> Self {
1127 Self {
1128 thinking_level: Some(level),
1129 include_thoughts: Some(include_thoughts),
1130 thinking_budget: None,
1131 }
1132 }
1133
1134 /// Creates a configuration for Gemini 2.5 models with specified thinking budget.
1135 ///
1136 /// # Arguments
1137 ///
1138 /// * `budget` - The thinking budget (-1 for dynamic, 0 to disable, or specific token count).
1139 /// * `include_thoughts` - Whether to include thought summaries.
1140 pub fn gemini_2_5(budget: i32, include_thoughts: bool) -> Self {
1141 Self {
1142 thinking_budget: Some(budget),
1143 include_thoughts: Some(include_thoughts),
1144 thinking_level: None,
1145 }
1146 }
1147
1148 /// Creates a configuration optimized for complex reasoning tasks.
1149 pub fn high_reasoning() -> Self {
1150 Self {
1151 thinking_level: Some(ThinkingLevel::High),
1152 include_thoughts: Some(true),
1153 thinking_budget: Some(-1), // Dynamic for 2.5 models
1154 }
1155 }
1156
1157 /// Creates a configuration optimized for fast responses.
1158 pub fn fast_response() -> Self {
1159 Self {
1160 thinking_level: Some(ThinkingLevel::Low),
1161 include_thoughts: Some(false),
1162 thinking_budget: Some(512), // Low budget for 2.5 models
1163 }
1164 }
1165}
1166
1167impl ContentPart {
1168 /// Creates a new text content part.
1169 pub fn text(content: impl Into<String>) -> Self {
1170 Self {
1171 text: Some(content.into()),
1172 function_call: None,
1173 function_response: None,
1174 thought: None,
1175 thought_signature: None,
1176 }
1177 }
1178
1179 /// Creates a new function call content part.
1180 pub fn function_call(name: impl Into<String>, args: serde_json::Value) -> Self {
1181 Self {
1182 text: None,
1183 function_call: Some(FunctionCall {
1184 name: name.into(),
1185 args,
1186 }),
1187 function_response: None,
1188 thought: None,
1189 thought_signature: None,
1190 }
1191 }
1192
1193 /// Creates a new function response content part.
1194 pub fn function_response(name: impl Into<String>, response: serde_json::Value) -> Self {
1195 Self {
1196 text: None,
1197 function_call: None,
1198 function_response: Some(FunctionResponse {
1199 name: name.into(),
1200 response,
1201 }),
1202 thought: None,
1203 thought_signature: None,
1204 }
1205 }
1206
1207 /// Adds a thought signature to this content part.
1208 pub fn with_thought_signature(mut self, signature: impl Into<String>) -> Self {
1209 self.thought_signature = Some(signature.into());
1210 self
1211 }
1212
1213 /// Marks this content part as containing thought content.
1214 pub fn as_thought(mut self) -> Self {
1215 self.thought = Some(true);
1216 self
1217 }
1218}
1219
1220impl EnhancedChatMessage {
1221 /// Creates a new enhanced message with the `System` role.
1222 pub fn system(content: impl Into<String>) -> Self {
1223 Self {
1224 role: MessageRole::System,
1225 parts: vec![ContentPart::text(content)],
1226 }
1227 }
1228
1229 /// Creates a new enhanced message with the `User` role.
1230 pub fn user(content: impl Into<String>) -> Self {
1231 Self {
1232 role: MessageRole::User,
1233 parts: vec![ContentPart::text(content)],
1234 }
1235 }
1236
1237 /// Creates a new enhanced message with the `Assistant` role.
1238 pub fn assistant(content: impl Into<String>) -> Self {
1239 Self {
1240 role: MessageRole::Assistant,
1241 parts: vec![ContentPart::text(content)],
1242 }
1243 }
1244
1245 /// Creates a new enhanced message with multiple content parts.
1246 pub fn with_parts(role: MessageRole, parts: Vec<ContentPart>) -> Self {
1247 Self { role, parts }
1248 }
1249}
1250
1251/// Represents a streaming chat completion response (OpenAI delta format).
1252#[derive(Debug, Clone, Serialize, Deserialize)]
1253pub struct ChatCompletionStreamResponse {
1254 /// A unique identifier for the chat completion.
1255 pub id: String,
1256 /// The type of object, which is always "chat.completion.chunk".
1257 pub object: String,
1258 /// The Unix timestamp (in seconds) of when the completion was created.
1259 pub created: u64,
1260 /// The model that was used for the completion.
1261 pub model: String,
1262 /// A list of chat completion choices.
1263 pub choices: Vec<ChatCompletionStreamChoice>,
1264 /// Information about the token usage for this completion (only present in the final chunk).
1265 #[serde(skip_serializing_if = "Option::is_none")]
1266 pub usage: Option<Usage>,
1267}
1268
1269/// Represents a single choice in a streaming chat completion response.
1270#[derive(Debug, Clone, Serialize, Deserialize)]
1271pub struct ChatCompletionStreamChoice {
1272 /// The index of the choice in the list of choices.
1273 pub index: u32,
1274 /// The delta containing the new content for this choice.
1275 pub delta: ChatCompletionStreamDelta,
1276 /// The reason the model stopped generating tokens (only present in the final chunk).
1277 #[serde(skip_serializing_if = "Option::is_none")]
1278 pub finish_reason: Option<String>,
1279}
1280
1281/// Represents the delta (change) in a streaming chat completion response.
1282#[derive(Debug, Clone, Serialize, Deserialize)]
1283pub struct ChatCompletionStreamDelta {
1284 /// The role of the message (only present in the first chunk).
1285 #[serde(skip_serializing_if = "Option::is_none")]
1286 pub role: Option<String>,
1287 /// The new content for this chunk.
1288 #[serde(skip_serializing_if = "Option::is_none")]
1289 pub content: Option<String>,
1290 /// The thinking/reasoning content for this chunk (if any).
1291 #[serde(skip_serializing_if = "Option::is_none")]
1292 pub thought: Option<String>,
1293 /// Tool calls for this chunk (if any).
1294 #[serde(skip_serializing_if = "Option::is_none")]
1295 pub tool_calls: Option<Vec<ToolCall>>,
1296}
1297
1298/// Represents a tool call in a streaming response.
1299#[derive(Debug, Clone, Serialize, Deserialize)]
1300pub struct ToolCall {
1301 /// The index of the tool call.
1302 pub index: u32,
1303 /// The ID of the tool call.
1304 #[serde(skip_serializing_if = "Option::is_none")]
1305 pub id: Option<String>,
1306 /// The type of the tool call.
1307 #[serde(skip_serializing_if = "Option::is_none")]
1308 pub r#type: Option<String>,
1309 /// The function being called.
1310 #[serde(skip_serializing_if = "Option::is_none")]
1311 pub function: Option<ToolCallFunction>,
1312}
1313
1314/// Represents a function call in a tool call.
1315#[derive(Debug, Clone, Serialize, Deserialize)]
1316pub struct ToolCallFunction {
1317 /// The name of the function.
1318 #[serde(skip_serializing_if = "Option::is_none")]
1319 pub name: Option<String>,
1320 /// The arguments for the function.
1321 #[serde(skip_serializing_if = "Option::is_none")]
1322 pub arguments: Option<String>,
1323}