rainy_sdk/models.rs
1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4
5/// Represents a single message in a chat conversation.
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
7pub struct ChatMessage {
8 /// The role of the message author.
9 pub role: MessageRole,
10 /// The content of the message.
11 pub content: String,
12}
13
14/// The role of a message's author.
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
16#[serde(rename_all = "lowercase")]
17pub enum MessageRole {
18 /// A message from the system, setting the context or instructions for the assistant.
19 System,
20 /// A message from the user.
21 User,
22 /// A message from the assistant.
23 Assistant,
24}
25
26/// Represents a request to create a chat completion.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct ChatCompletionRequest {
29 /// The identifier of the model to use for the completion (e.g., "gpt-4o", "claude-sonnet-4").
30 pub model: String,
31
32 /// A list of messages that form the conversation history.
33 pub messages: Vec<ChatMessage>,
34
35 /// The sampling temperature to use, between 0.0 and 2.0. Higher values will make the output
36 /// more random, while lower values will make it more focused and deterministic.
37 #[serde(skip_serializing_if = "Option::is_none")]
38 pub temperature: Option<f32>,
39
40 /// The maximum number of tokens to generate in the completion.
41 #[serde(skip_serializing_if = "Option::is_none")]
42 pub max_tokens: Option<u32>,
43
44 /// The nucleus sampling parameter. The model considers the results of the tokens with `top_p`
45 /// probability mass. So, 0.1 means only the tokens comprising the top 10% probability mass are considered.
46 #[serde(skip_serializing_if = "Option::is_none")]
47 pub top_p: Option<f32>,
48
49 /// A penalty applied to new tokens based on their frequency in the text so far.
50 /// It decreases the model's likelihood to repeat the same line verbatim.
51 #[serde(skip_serializing_if = "Option::is_none")]
52 pub frequency_penalty: Option<f32>,
53
54 /// A penalty applied to new tokens based on whether they appear in the text so far.
55 /// It increases the model's likelihood to talk about new topics.
56 #[serde(skip_serializing_if = "Option::is_none")]
57 pub presence_penalty: Option<f32>,
58
59 /// A list of sequences that will cause the model to stop generating further tokens.
60 #[serde(skip_serializing_if = "Option::is_none")]
61 pub stop: Option<Vec<String>>,
62
63 /// A unique identifier representing your end-user, which can help in monitoring and
64 /// tracking conversations.
65 #[serde(skip_serializing_if = "Option::is_none")]
66 pub user: Option<String>,
67
68 /// A hint to the router about which provider to use for the model.
69 #[serde(skip_serializing_if = "Option::is_none")]
70 pub provider: Option<String>,
71
72 /// If set to `true`, the response will be streamed as a series of events.
73 #[serde(skip_serializing_if = "Option::is_none")]
74 pub stream: Option<bool>,
75
76 /// Modify the likelihood of specified tokens appearing in the completion.
77 #[serde(skip_serializing_if = "Option::is_none")]
78 pub logit_bias: Option<serde_json::Value>,
79
80 /// Whether to return log probabilities of the output tokens.
81 #[serde(skip_serializing_if = "Option::is_none")]
82 pub logprobs: Option<bool>,
83
84 /// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position.
85 #[serde(skip_serializing_if = "Option::is_none")]
86 pub top_logprobs: Option<u32>,
87
88 /// How many chat completion choices to generate for each input message.
89 #[serde(skip_serializing_if = "Option::is_none")]
90 pub n: Option<u32>,
91
92 /// An object specifying the format that the model must output.
93 #[serde(skip_serializing_if = "Option::is_none")]
94 pub response_format: Option<ResponseFormat>,
95
96 /// A list of tools the model may call.
97 #[serde(skip_serializing_if = "Option::is_none")]
98 pub tools: Option<Vec<Tool>>,
99
100 /// Controls which (if any) tool is called by the model.
101 #[serde(skip_serializing_if = "Option::is_none")]
102 pub tool_choice: Option<ToolChoice>,
103
104 /// Configuration for thinking capabilities (Gemini 3 and 2.5 series).
105 #[serde(skip_serializing_if = "Option::is_none")]
106 pub thinking_config: Option<ThinkingConfig>,
107}
108
109/// Represents the response from a chat completion request.
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct ChatCompletionResponse {
112 /// A unique identifier for the chat completion.
113 pub id: String,
114
115 /// The type of object, which is always "chat.completion".
116 pub object: String,
117
118 /// The Unix timestamp (in seconds) of when the completion was created.
119 pub created: u64,
120
121 /// The model that was used for the completion.
122 pub model: String,
123
124 /// A list of chat completion choices.
125 pub choices: Vec<ChatChoice>,
126
127 /// Information about the token usage for this completion.
128 #[serde(skip_serializing_if = "Option::is_none")]
129 pub usage: Option<Usage>,
130}
131
132/// Represents a single choice in a chat completion response.
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct ChatChoice {
135 /// The index of the choice in the list of choices.
136 pub index: u32,
137
138 /// The message generated by the model.
139 pub message: ChatMessage,
140
141 /// The reason the model stopped generating tokens.
142 pub finish_reason: String,
143}
144
145/// Represents the token usage statistics for a chat completion.
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct Usage {
148 /// The number of tokens in the prompt.
149 pub prompt_tokens: u32,
150
151 /// The number of tokens in the generated completion.
152 pub completion_tokens: u32,
153
154 /// The total number of tokens used in the request (prompt + completion).
155 pub total_tokens: u32,
156}
157
158/// Represents the health status of the Rainy API.
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct HealthStatus {
161 /// The overall status of the API (e.g., "healthy", "degraded").
162 pub status: String,
163
164 /// The timestamp of when the health check was performed.
165 pub timestamp: String,
166
167 /// The uptime of the system in seconds.
168 pub uptime: f64,
169
170 /// The status of individual services.
171 pub services: ServiceStatus,
172}
173
174/// Represents the status of individual backend services.
175#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct ServiceStatus {
177 /// The status of the database connection.
178 pub database: bool,
179
180 /// The status of the Redis connection, if applicable.
181 #[serde(skip_serializing_if = "Option::is_none")]
182 pub redis: Option<bool>,
183
184 /// The overall status of the connections to AI providers.
185 pub providers: bool,
186}
187
188/// Represents the available models and providers.
189#[derive(Debug, Clone, Serialize, Deserialize)]
190pub struct AvailableModels {
191 /// A map where keys are provider names and values are lists of model names.
192 pub providers: HashMap<String, Vec<String>>,
193
194 /// The total number of available models across all providers.
195 pub total_models: usize,
196
197 /// A list of provider names that are currently active and available.
198 pub active_providers: Vec<String>,
199}
200
201/// Represents information about credit usage for a request.
202#[derive(Debug, Clone, Serialize, Deserialize)]
203pub struct CreditInfo {
204 /// The number of credits available before the request.
205 pub current_credits: f64,
206
207 /// The estimated number of credits that the request will cost.
208 pub estimated_cost: f64,
209
210 /// The estimated number of credits remaining after the request.
211 pub credits_after_request: f64,
212
213 /// The date when the credit balance is next scheduled to be reset.
214 pub reset_date: String,
215}
216
217/// Represents metadata extracted from the response headers of an API request.
218#[derive(Debug, Clone)]
219pub struct RequestMetadata {
220 /// The time taken for the request to complete, in milliseconds.
221 pub response_time: Option<u64>,
222
223 /// The AI provider that handled the request.
224 pub provider: Option<String>,
225
226 /// The number of tokens used in the request.
227 pub tokens_used: Option<u32>,
228
229 /// The number of credits used for the request.
230 pub credits_used: Option<f64>,
231
232 /// The number of credits remaining after the request.
233 pub credits_remaining: Option<f64>,
234
235 /// The unique ID of the request, for tracking and debugging.
236 pub request_id: Option<String>,
237}
238
239/// A collection of predefined model constants for convenience.
240/// All models listed here are confirmed to be 100% OpenAI-compatible without parameter adaptations.
241pub mod model_constants {
242 // OpenAI models (fully compatible)
243 /// Constant for the GPT-4o model.
244 pub const OPENAI_GPT_4O: &str = "gpt-4o";
245 /// Constant for the GPT-5 model.
246 pub const OPENAI_GPT_5: &str = "gpt-5";
247 /// Constant for the GPT-5 Pro model.
248 pub const OPENAI_GPT_5_PRO: &str = "gpt-5-pro";
249 /// Constant for the O3 model.
250 pub const OPENAI_O3: &str = "o3";
251 /// Constant for the O4 Mini model.
252 pub const OPENAI_O4_MINI: &str = "o4-mini";
253
254 // Google Gemini models (fully compatible via official compatibility layer)
255 /// Constant for the Gemini 2.5 Pro model.
256 pub const GOOGLE_GEMINI_2_5_PRO: &str = "gemini-2.5-pro";
257 /// Constant for the Gemini 2.5 Flash model.
258 pub const GOOGLE_GEMINI_2_5_FLASH: &str = "gemini-2.5-flash";
259 /// Constant for the Gemini 2.5 Flash Lite model.
260 pub const GOOGLE_GEMINI_2_5_FLASH_LITE: &str = "gemini-2.5-flash-lite";
261
262 // Gemini 3 series - Advanced reasoning models with thinking capabilities
263 /// Constant for the Gemini 3 Pro model with advanced reasoning.
264 pub const GOOGLE_GEMINI_3_PRO: &str = "gemini-3-pro-preview";
265 /// Constant for the Gemini 3 Flash model with thinking capabilities.
266 pub const GOOGLE_GEMINI_3_FLASH: &str = "gemini-3-flash-preview";
267 /// Constant for the Gemini 3 Pro Image model with multimodal reasoning.
268 pub const GOOGLE_GEMINI_3_PRO_IMAGE: &str = "gemini-3-pro-image-preview";
269
270 // Groq models (fully compatible)
271 /// Constant for the Llama 3.1 8B Instant model.
272 pub const GROQ_LLAMA_3_1_8B_INSTANT: &str = "llama-3.1-8b-instant";
273 /// Constant for the Llama 3.3 70B Versatile model.
274 pub const GROQ_LLAMA_3_3_70B_VERSATILE: &str = "llama-3.3-70b-versatile";
275 /// Constant for the moonshotai/kimi-k2-instruct-0905 Instant model.
276 pub const KIMI_K2_0925: &str = "moonshotai/kimi-k2-instruct-0905";
277
278 // Cerebras models (fully compatible)
279 /// Constant for the Llama3.1 8B model.
280 pub const CEREBRAS_LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
281
282 // Enosis Labs models (fully compatible)
283 /// Constant for the Astronomer 1 model.
284 pub const ASTRONOMER_1: &str = "astronomer-1";
285 /// Constant for the Astronomer 1 Max model.
286 pub const ASTRONOMER_1_MAX: &str = "astronomer-1-max";
287 /// Constant for the Astronomer 1.5 model.
288 pub const ASTRONOMER_1_5: &str = "astronomer-1.5";
289 /// Constant for the Astronomer 2 model.
290 pub const ASTRONOMER_2: &str = "astronomer-2";
291 /// Constant for the Astronomer 2 Pro model.
292 pub const ASTRONOMER_2_PRO: &str = "astronomer-2-pro";
293
294 // Legacy aliases for backward compatibility (deprecated - use provider-prefixed versions above)
295 /// Legacy constant for the GPT-4o model (use OPENAI_GPT_4O instead).
296 #[deprecated(note = "Use OPENAI_GPT_4O instead for OpenAI compatibility")]
297 pub const GPT_4O: &str = "openai/gpt-4o";
298 /// Legacy constant for the GPT-5 model (use OPENAI_GPT_5 instead).
299 #[deprecated(note = "Use OPENAI_GPT_5 instead for OpenAI compatibility")]
300 pub const GPT_5: &str = "openai/gpt-5";
301 /// Legacy constant for the Gemini 2.5 Pro model (use GOOGLE_GEMINI_2_5_PRO instead).
302 #[deprecated(note = "Use GOOGLE_GEMINI_2_5_PRO instead for OpenAI compatibility")]
303 pub const GEMINI_2_5_PRO: &str = "google/gemini-2.5-pro";
304 /// Legacy constant for the Gemini 2.5 Flash model (use GOOGLE_GEMINI_2_5_FLASH instead).
305 #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH instead for OpenAI compatibility")]
306 pub const GEMINI_2_5_FLASH: &str = "google/gemini-2.5-flash";
307 /// Legacy constant for the Gemini 2.5 Flash Lite model (use GOOGLE_GEMINI_2_5_FLASH_LITE instead).
308 #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH_LITE instead for OpenAI compatibility")]
309 pub const GEMINI_2_5_FLASH_LITE: &str = "google/gemini-2.5-flash-lite";
310 /// Legacy constant for the Llama 3.1 8B Instant model (use GROQ_LLAMA_3_1_8B_INSTANT instead).
311 #[deprecated(note = "Use GROQ_LLAMA_3_1_8B_INSTANT instead for OpenAI compatibility")]
312 pub const LLAMA_3_1_8B_INSTANT: &str = "groq/llama-3.1-8b-instant";
313 /// Legacy constant for the Llama3.1 8B model (use CEREBRAS_LLAMA3_1_8B instead).
314 #[deprecated(note = "Use CEREBRAS_LLAMA3_1_8B instead for OpenAI compatibility")]
315 pub const LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
316}
317
318/// A collection of predefined provider name constants for convenience.
319pub mod providers {
320 /// Constant for the OpenAI provider.
321 pub const OPENAI: &str = "openai";
322 /// Constant for the Anthropic provider.
323 pub const ANTHROPIC: &str = "anthropic";
324 /// Constant for the Groq provider.
325 pub const GROQ: &str = "groq";
326 /// Constant for the Cerebras provider.
327 pub const CEREBRAS: &str = "cerebras";
328 /// Constant for the Gemini provider.
329 pub const GEMINI: &str = "gemini";
330 /// Constant for the Enosis Labs provider.
331 pub const ENOSISLABS: &str = "enosislabs";
332}
333
334impl ChatCompletionRequest {
335 /// Creates a new `ChatCompletionRequest` with the given model and messages.
336 ///
337 /// # Arguments
338 ///
339 /// * `model` - The identifier of the model to use.
340 /// * `messages` - The list of messages for the conversation.
341 pub fn new(model: impl Into<String>, messages: Vec<ChatMessage>) -> Self {
342 Self {
343 model: model.into(),
344 messages,
345 temperature: None,
346 max_tokens: None,
347 top_p: None,
348 frequency_penalty: None,
349 presence_penalty: None,
350 stop: None,
351 user: None,
352 provider: None,
353 stream: None,
354 logit_bias: None,
355 logprobs: None,
356 top_logprobs: None,
357 n: None,
358 response_format: None,
359 tools: None,
360 tool_choice: None,
361 thinking_config: None,
362 }
363 }
364
365 /// Sets the temperature for the chat completion.
366 ///
367 /// The temperature is clamped between 0.0 and 2.0.
368 ///
369 /// # Arguments
370 ///
371 /// * `temperature` - The sampling temperature.
372 pub fn with_temperature(mut self, temperature: f32) -> Self {
373 self.temperature = Some(temperature.clamp(0.0, 2.0));
374 self
375 }
376
377 /// Sets the maximum number of tokens to generate.
378 ///
379 /// # Arguments
380 ///
381 /// * `max_tokens` - The maximum number of tokens.
382 pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
383 self.max_tokens = Some(max_tokens);
384 self
385 }
386
387 /// Sets the user identifier for the chat completion.
388 ///
389 /// # Arguments
390 ///
391 /// * `user` - A unique identifier for the end-user.
392 pub fn with_user(mut self, user: impl Into<String>) -> Self {
393 self.user = Some(user.into());
394 self
395 }
396
397 /// Sets a provider hint for the request.
398 ///
399 /// # Arguments
400 ///
401 /// * `provider` - The name of the provider to use.
402 pub fn with_provider(mut self, provider: impl Into<String>) -> Self {
403 self.provider = Some(provider.into());
404 self
405 }
406
407 /// Enables or disables streaming for the response.
408 ///
409 /// # Arguments
410 ///
411 /// * `stream` - `true` to enable streaming, `false` to disable.
412 pub fn with_stream(mut self, stream: bool) -> Self {
413 self.stream = Some(stream);
414 self
415 }
416
417 /// Sets the logit bias for the chat completion.
418 ///
419 /// # Arguments
420 ///
421 /// * `logit_bias` - A map of token IDs to bias values.
422 pub fn with_logit_bias(mut self, logit_bias: serde_json::Value) -> Self {
423 self.logit_bias = Some(logit_bias);
424 self
425 }
426
427 /// Enables or disables log probabilities for the response.
428 ///
429 /// # Arguments
430 ///
431 /// * `logprobs` - `true` to include log probabilities.
432 pub fn with_logprobs(mut self, logprobs: bool) -> Self {
433 self.logprobs = Some(logprobs);
434 self
435 }
436
437 /// Sets the number of most likely tokens to return at each position.
438 ///
439 /// # Arguments
440 ///
441 /// * `top_logprobs` - The number of top log probabilities to return.
442 pub fn with_top_logprobs(mut self, top_logprobs: u32) -> Self {
443 self.top_logprobs = Some(top_logprobs);
444 self
445 }
446
447 /// Sets the number of chat completion choices to generate.
448 ///
449 /// # Arguments
450 ///
451 /// * `n` - The number of completions to generate.
452 pub fn with_n(mut self, n: u32) -> Self {
453 self.n = Some(n);
454 self
455 }
456
457 /// Sets the response format for the chat completion.
458 ///
459 /// # Arguments
460 ///
461 /// * `response_format` - The format the model must output.
462 pub fn with_response_format(mut self, response_format: ResponseFormat) -> Self {
463 self.response_format = Some(response_format);
464 self
465 }
466
467 /// Sets the tools available to the model.
468 ///
469 /// # Arguments
470 ///
471 /// * `tools` - A list of tools the model can use.
472 pub fn with_tools(mut self, tools: Vec<Tool>) -> Self {
473 self.tools = Some(tools);
474 self
475 }
476
477 /// Sets the tool choice for the chat completion.
478 ///
479 /// # Arguments
480 ///
481 /// * `tool_choice` - Controls which tool the model uses.
482 pub fn with_tool_choice(mut self, tool_choice: ToolChoice) -> Self {
483 self.tool_choice = Some(tool_choice);
484 self
485 }
486
487 /// Sets the thinking configuration for Gemini 3 and 2.5 series models.
488 ///
489 /// # Arguments
490 ///
491 /// * `thinking_config` - Configuration for thinking capabilities.
492 pub fn with_thinking_config(mut self, thinking_config: ThinkingConfig) -> Self {
493 self.thinking_config = Some(thinking_config);
494 self
495 }
496
497 /// Enables thought summaries in the response (Gemini 3 and 2.5 series).
498 ///
499 /// # Arguments
500 ///
501 /// * `include_thoughts` - Whether to include thought summaries.
502 pub fn with_include_thoughts(mut self, include_thoughts: bool) -> Self {
503 let mut config = self.thinking_config.unwrap_or_default();
504 config.include_thoughts = Some(include_thoughts);
505 self.thinking_config = Some(config);
506 self
507 }
508
509 /// Sets the thinking level for Gemini 3 models.
510 ///
511 /// # Arguments
512 ///
513 /// * `thinking_level` - The thinking level (minimal, low, medium, high).
514 pub fn with_thinking_level(mut self, thinking_level: ThinkingLevel) -> Self {
515 let mut config = self.thinking_config.unwrap_or_default();
516 config.thinking_level = Some(thinking_level);
517 self.thinking_config = Some(config);
518 self
519 }
520
521 /// Sets the thinking budget for Gemini 2.5 models.
522 ///
523 /// # Arguments
524 ///
525 /// * `thinking_budget` - Number of thinking tokens (-1 for dynamic, 0 to disable).
526 pub fn with_thinking_budget(mut self, thinking_budget: i32) -> Self {
527 let mut config = self.thinking_config.unwrap_or_default();
528 config.thinking_budget = Some(thinking_budget);
529 self.thinking_config = Some(config);
530 self
531 }
532
533 /// Validates that the request parameters are compatible with OpenAI standards.
534 ///
535 /// This method checks parameter ranges and values to ensure they match OpenAI's API specifications.
536 /// Also validates Gemini 3 specific parameters like thinking configuration.
537 ///
538 /// # Returns
539 ///
540 /// A `Result` indicating whether the request is valid for OpenAI compatibility.
541 pub fn validate_openai_compatibility(&self) -> Result<(), String> {
542 // Validate temperature
543 if let Some(temp) = self.temperature {
544 if !(0.0..=2.0).contains(&temp) {
545 return Err(format!(
546 "Temperature must be between 0.0 and 2.0, got {}",
547 temp
548 ));
549 }
550 }
551
552 // Validate top_p
553 if let Some(top_p) = self.top_p {
554 if !(0.0..=1.0).contains(&top_p) {
555 return Err(format!("Top-p must be between 0.0 and 1.0, got {}", top_p));
556 }
557 }
558
559 // Validate frequency_penalty
560 if let Some(fp) = self.frequency_penalty {
561 if !(-2.0..=2.0).contains(&fp) {
562 return Err(format!(
563 "Frequency penalty must be between -2.0 and 2.0, got {}",
564 fp
565 ));
566 }
567 }
568
569 // Validate presence_penalty
570 if let Some(pp) = self.presence_penalty {
571 if !(-2.0..=2.0).contains(&pp) {
572 return Err(format!(
573 "Presence penalty must be between -2.0 and 2.0, got {}",
574 pp
575 ));
576 }
577 }
578
579 // Validate max_tokens
580 if let Some(mt) = self.max_tokens {
581 if mt == 0 {
582 return Err("Max tokens must be greater than 0".to_string());
583 }
584 }
585
586 // Validate top_logprobs
587 if let Some(tlp) = self.top_logprobs {
588 if !(0..=20).contains(&tlp) {
589 return Err(format!(
590 "Top logprobs must be between 0 and 20, got {}",
591 tlp
592 ));
593 }
594 }
595
596 // Validate n
597 if let Some(n) = self.n {
598 if n == 0 {
599 return Err("n must be greater than 0".to_string());
600 }
601 }
602
603 // Validate stop sequences
604 if let Some(stop) = &self.stop {
605 if stop.len() > 4 {
606 return Err("Cannot have more than 4 stop sequences".to_string());
607 }
608 for seq in stop {
609 if seq.is_empty() {
610 return Err("Stop sequences cannot be empty".to_string());
611 }
612 if seq.len() > 64 {
613 return Err("Stop sequences cannot be longer than 64 characters".to_string());
614 }
615 }
616 }
617
618 // Validate thinking configuration for Gemini models
619 if let Some(thinking_config) = &self.thinking_config {
620 self.validate_thinking_config(thinking_config)?;
621 }
622
623 Ok(())
624 }
625
626 /// Validates thinking configuration parameters for Gemini models.
627 fn validate_thinking_config(&self, config: &ThinkingConfig) -> Result<(), String> {
628 let is_gemini_3 = self.model.contains("gemini-3");
629 let is_gemini_2_5 = self.model.contains("gemini-2.5");
630 let is_gemini_3_pro = self.model.contains("gemini-3-pro");
631
632 // Validate thinking level (Gemini 3 only)
633 if let Some(level) = &config.thinking_level {
634 if !is_gemini_3 {
635 return Err("thinking_level is only supported for Gemini 3 models".to_string());
636 }
637
638 match level {
639 ThinkingLevel::Minimal | ThinkingLevel::Medium => {
640 if is_gemini_3_pro {
641 return Err(
642 "Gemini 3 Pro only supports 'low' and 'high' thinking levels"
643 .to_string(),
644 );
645 }
646 }
647 _ => {}
648 }
649 }
650
651 // Validate thinking budget (Gemini 2.5 only)
652 if let Some(budget) = config.thinking_budget {
653 if !is_gemini_2_5 {
654 return Err("thinking_budget is only supported for Gemini 2.5 models".to_string());
655 }
656
657 // Validate budget ranges based on model
658 if self.model.contains("2.5-pro") {
659 if budget != -1 && !(128..=32768).contains(&budget) {
660 return Err(
661 "Gemini 2.5 Pro thinking budget must be -1 (dynamic) or between 128-32768"
662 .to_string(),
663 );
664 }
665 } else if self.model.contains("2.5-flash")
666 && budget != -1
667 && !(0..=24576).contains(&budget)
668 {
669 return Err(
670 "Gemini 2.5 Flash thinking budget must be -1 (dynamic) or between 0-24576"
671 .to_string(),
672 );
673 }
674 }
675
676 // Warn about conflicting parameters
677 if config.thinking_level.is_some() && config.thinking_budget.is_some() {
678 return Err("Cannot specify both thinking_level (Gemini 3) and thinking_budget (Gemini 2.5) in the same request".to_string());
679 }
680
681 Ok(())
682 }
683
684 /// Checks if the model supports thinking capabilities.
685 pub fn supports_thinking(&self) -> bool {
686 self.model.contains("gemini-3") || self.model.contains("gemini-2.5")
687 }
688
689 /// Checks if the model requires thought signatures for function calling.
690 pub fn requires_thought_signatures(&self) -> bool {
691 self.model.contains("gemini-3")
692 }
693}
694
695impl ChatMessage {
696 /// Creates a new message with the `System` role.
697 ///
698 /// # Arguments
699 ///
700 /// * `content` - The content of the system message.
701 pub fn system(content: impl Into<String>) -> Self {
702 Self {
703 role: MessageRole::System,
704 content: content.into(),
705 }
706 }
707
708 /// Creates a new message with the `User` role.
709 ///
710 /// # Arguments
711 ///
712 /// * `content` - The content of the user message.
713 pub fn user(content: impl Into<String>) -> Self {
714 Self {
715 role: MessageRole::User,
716 content: content.into(),
717 }
718 }
719
720 /// Creates a new message with the `Assistant` role.
721 ///
722 /// # Arguments
723 ///
724 /// * `content` - The content of the assistant message.
725 pub fn assistant(content: impl Into<String>) -> Self {
726 Self {
727 role: MessageRole::Assistant,
728 content: content.into(),
729 }
730 }
731}
732
733// Legacy compatibility types - keep existing types for backward compatibility
734use uuid::Uuid;
735
736/// Represents a user account (legacy).
737#[derive(Debug, Clone, Serialize, Deserialize)]
738pub struct User {
739 /// The unique ID of the user.
740 pub id: Uuid,
741 /// The user's identifier string.
742 pub user_id: String,
743 /// The name of the user's subscription plan.
744 pub plan_name: String,
745 /// The user's current credit balance.
746 pub current_credits: f64,
747 /// The amount of credits the user has used in the current month.
748 pub credits_used_this_month: f64,
749 /// The date when the user's credits will reset.
750 pub credits_reset_date: DateTime<Utc>,
751 /// Indicates if the user account is active.
752 pub is_active: bool,
753 /// The timestamp of when the user account was created.
754 pub created_at: DateTime<Utc>,
755}
756
757/// Represents an API key (legacy).
758#[derive(Debug, Clone, Serialize, Deserialize)]
759pub struct ApiKey {
760 /// The unique ID of the API key.
761 pub id: Uuid,
762 /// The API key string.
763 pub key: String,
764 /// The ID of the user who owns the key.
765 pub owner_id: Uuid,
766 /// Indicates if the API key is active.
767 pub is_active: bool,
768 /// The timestamp of when the key was created.
769 pub created_at: DateTime<Utc>,
770 /// The expiration date of the key, if any.
771 pub expires_at: Option<DateTime<Utc>>,
772 /// A description of the key.
773 pub description: Option<String>,
774 /// The timestamp of when the key was last used.
775 pub last_used_at: Option<DateTime<Utc>>,
776}
777
778/// Represents usage statistics over a period (legacy).
779#[derive(Debug, Clone, Serialize, Deserialize)]
780pub struct UsageStats {
781 /// The number of days in the usage period.
782 pub period_days: u32,
783 /// A list of daily usage data.
784 pub daily_usage: Vec<DailyUsage>,
785 /// A list of recent credit transactions.
786 pub recent_transactions: Vec<CreditTransaction>,
787 /// The total number of requests made in the period.
788 pub total_requests: u64,
789 /// The total number of tokens used in the period.
790 pub total_tokens: u64,
791}
792
793/// Represents usage data for a single day (legacy).
794#[derive(Debug, Clone, Serialize, Deserialize)]
795pub struct DailyUsage {
796 /// The date for the usage data.
797 pub date: String,
798 /// The number of credits used on this day.
799 pub credits_used: f64,
800 /// The number of requests made on this day.
801 pub requests: u64,
802 /// The number of tokens used on this day.
803 pub tokens: u64,
804}
805
806/// Represents a single credit transaction (legacy).
807#[derive(Debug, Clone, Serialize, Deserialize)]
808pub struct CreditTransaction {
809 /// The unique ID of the transaction.
810 pub id: Uuid,
811 /// The type of the transaction.
812 pub transaction_type: TransactionType,
813 /// The amount of credits involved in the transaction.
814 pub credits_amount: f64,
815 /// The credit balance after the transaction.
816 pub credits_balance_after: f64,
817 /// The provider associated with the transaction, if any.
818 pub provider: Option<String>,
819 /// The model associated with the transaction, if any.
820 pub model: Option<String>,
821 /// A description of the transaction.
822 pub description: String,
823 /// The timestamp of when the transaction occurred.
824 pub created_at: DateTime<Utc>,
825}
826
827/// The type of credit transaction (legacy).
828#[derive(Debug, Clone, Serialize, Deserialize)]
829#[serde(rename_all = "lowercase")]
830pub enum TransactionType {
831 /// A transaction for API usage.
832 Usage,
833 /// A transaction for a credit reset.
834 Reset,
835 /// A transaction for a credit purchase.
836 Purchase,
837 /// A transaction for a credit refund.
838 Refund,
839}
840
841// Legacy aliases for backward compatibility
842/// A legacy type alias for `MessageRole`.
843pub type ChatRole = MessageRole;
844/// A legacy type alias for `Usage`.
845pub type ChatUsage = Usage;
846/// A legacy type alias for `HealthStatus`.
847pub type HealthCheck = HealthStatus;
848
849/// Represents the status of backend services (legacy).
850#[derive(Debug, Clone, Serialize, Deserialize)]
851pub struct HealthServices {
852 /// The status of the database connection.
853 pub database: bool,
854 /// The status of the Redis connection.
855 pub redis: bool,
856 /// The overall status of AI providers.
857 pub providers: bool,
858}
859
860/// The health status of the API (legacy).
861#[derive(Debug, Clone, Serialize, Deserialize)]
862#[serde(rename_all = "lowercase")]
863pub enum HealthStatusEnum {
864 /// The API is healthy.
865 Healthy,
866 /// The API is in a degraded state.
867 Degraded,
868 /// The API is unhealthy.
869 Unhealthy,
870 /// The API needs initialization.
871 NeedsInit,
872}
873
874/// Represents the format that the model must output.
875#[derive(Debug, Clone, Serialize, Deserialize)]
876#[serde(rename_all = "snake_case")]
877pub enum ResponseFormat {
878 /// The model can return text.
879 Text,
880 /// The model must return a valid JSON object.
881 JsonObject,
882 /// The model must return a JSON object that matches the provided schema.
883 JsonSchema { json_schema: serde_json::Value },
884}
885
886/// Represents a tool that the model can use.
887#[derive(Debug, Clone, Serialize, Deserialize)]
888pub struct Tool {
889 /// The type of the tool.
890 pub r#type: ToolType,
891 /// The function definition for the tool.
892 pub function: FunctionDefinition,
893}
894
895/// The type of tool.
896#[derive(Debug, Clone, Serialize, Deserialize)]
897#[serde(rename_all = "snake_case")]
898pub enum ToolType {
899 /// A function tool.
900 Function,
901}
902
903/// Represents a function definition for a tool.
904#[derive(Debug, Clone, Serialize, Deserialize)]
905pub struct FunctionDefinition {
906 /// The name of the function.
907 pub name: String,
908 /// A description of what the function does.
909 #[serde(skip_serializing_if = "Option::is_none")]
910 pub description: Option<String>,
911 /// The parameters the function accepts, described as a JSON Schema object.
912 #[serde(skip_serializing_if = "Option::is_none")]
913 pub parameters: Option<serde_json::Value>,
914}
915
916/// Controls which tool is called by the model.
917#[derive(Debug, Clone, Serialize, Deserialize)]
918#[serde(untagged)]
919pub enum ToolChoice {
920 /// No tool is called.
921 None,
922 /// The model chooses which tool to call.
923 Auto,
924 /// A specific tool is called.
925 Tool {
926 r#type: ToolType,
927 function: ToolFunction,
928 },
929}
930
931/// Represents a tool function call.
932#[derive(Debug, Clone, Serialize, Deserialize)]
933pub struct ToolFunction {
934 /// The name of the function to call.
935 pub name: String,
936}
937
938/// Configuration for thinking capabilities in Gemini 3 and 2.5 series models.
939#[derive(Debug, Clone, Serialize, Deserialize, Default)]
940pub struct ThinkingConfig {
941 /// Whether to include thought summaries in the response.
942 #[serde(skip_serializing_if = "Option::is_none")]
943 pub include_thoughts: Option<bool>,
944
945 /// The thinking level for Gemini 3 models (low, high for Pro; minimal, low, medium, high for Flash).
946 #[serde(skip_serializing_if = "Option::is_none")]
947 pub thinking_level: Option<ThinkingLevel>,
948
949 /// The thinking budget for Gemini 2.5 models (number of thinking tokens).
950 #[serde(skip_serializing_if = "Option::is_none")]
951 pub thinking_budget: Option<i32>,
952}
953
954/// Thinking levels for Gemini 3 models.
955#[derive(Debug, Clone, Serialize, Deserialize)]
956#[serde(rename_all = "lowercase")]
957pub enum ThinkingLevel {
958 /// Minimal thinking (Gemini 3 Flash only) - model likely won't think.
959 Minimal,
960 /// Low thinking level - faster responses with basic reasoning.
961 Low,
962 /// Medium thinking level (Gemini 3 Flash only) - balanced reasoning and speed.
963 Medium,
964 /// High thinking level - deep reasoning for complex tasks (default).
965 High,
966}
967
968/// Represents a content part that may include thought signatures.
969#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
970pub struct ContentPart {
971 /// The text content of the part.
972 #[serde(skip_serializing_if = "Option::is_none")]
973 pub text: Option<String>,
974
975 /// Function call information if this part contains a function call.
976 #[serde(skip_serializing_if = "Option::is_none")]
977 pub function_call: Option<FunctionCall>,
978
979 /// Function response information if this part contains a function response.
980 #[serde(skip_serializing_if = "Option::is_none")]
981 pub function_response: Option<FunctionResponse>,
982
983 /// Indicates if this part contains thought content.
984 #[serde(skip_serializing_if = "Option::is_none")]
985 pub thought: Option<bool>,
986
987 /// Encrypted thought signature for preserving reasoning context across turns.
988 #[serde(skip_serializing_if = "Option::is_none")]
989 pub thought_signature: Option<String>,
990}
991
992/// Represents a function call in the content.
993#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
994pub struct FunctionCall {
995 /// The name of the function being called.
996 pub name: String,
997 /// The arguments for the function call as a JSON object.
998 pub args: serde_json::Value,
999}
1000
1001/// Represents a function response in the content.
1002#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1003pub struct FunctionResponse {
1004 /// The name of the function that was called.
1005 pub name: String,
1006 /// The response from the function call.
1007 pub response: serde_json::Value,
1008}
1009
1010/// Enhanced chat message that supports Gemini 3 thinking capabilities.
1011#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1012pub struct EnhancedChatMessage {
1013 /// The role of the message author.
1014 pub role: MessageRole,
1015 /// The content parts of the message (supports text, function calls, and thought signatures).
1016 pub parts: Vec<ContentPart>,
1017}
1018
1019/// Enhanced usage statistics that include thinking tokens.
1020#[derive(Debug, Clone, Serialize, Deserialize)]
1021pub struct EnhancedUsage {
1022 /// The number of tokens in the prompt.
1023 pub prompt_tokens: u32,
1024 /// The number of tokens in the generated completion.
1025 pub completion_tokens: u32,
1026 /// The total number of tokens used in the request (prompt + completion).
1027 pub total_tokens: u32,
1028 /// The number of thinking tokens used (Gemini 3 and 2.5 series).
1029 #[serde(skip_serializing_if = "Option::is_none")]
1030 pub thoughts_token_count: Option<u32>,
1031}
1032
1033impl ThinkingConfig {
1034 /// Creates a new thinking configuration with default values.
1035 pub fn new() -> Self {
1036 Self::default()
1037 }
1038
1039 /// Creates a configuration for Gemini 3 models with specified thinking level.
1040 ///
1041 /// # Arguments
1042 ///
1043 /// * `level` - The thinking level to use.
1044 /// * `include_thoughts` - Whether to include thought summaries.
1045 pub fn gemini_3(level: ThinkingLevel, include_thoughts: bool) -> Self {
1046 Self {
1047 thinking_level: Some(level),
1048 include_thoughts: Some(include_thoughts),
1049 thinking_budget: None,
1050 }
1051 }
1052
1053 /// Creates a configuration for Gemini 2.5 models with specified thinking budget.
1054 ///
1055 /// # Arguments
1056 ///
1057 /// * `budget` - The thinking budget (-1 for dynamic, 0 to disable, or specific token count).
1058 /// * `include_thoughts` - Whether to include thought summaries.
1059 pub fn gemini_2_5(budget: i32, include_thoughts: bool) -> Self {
1060 Self {
1061 thinking_budget: Some(budget),
1062 include_thoughts: Some(include_thoughts),
1063 thinking_level: None,
1064 }
1065 }
1066
1067 /// Creates a configuration optimized for complex reasoning tasks.
1068 pub fn high_reasoning() -> Self {
1069 Self {
1070 thinking_level: Some(ThinkingLevel::High),
1071 include_thoughts: Some(true),
1072 thinking_budget: Some(-1), // Dynamic for 2.5 models
1073 }
1074 }
1075
1076 /// Creates a configuration optimized for fast responses.
1077 pub fn fast_response() -> Self {
1078 Self {
1079 thinking_level: Some(ThinkingLevel::Low),
1080 include_thoughts: Some(false),
1081 thinking_budget: Some(512), // Low budget for 2.5 models
1082 }
1083 }
1084}
1085
1086impl ContentPart {
1087 /// Creates a new text content part.
1088 pub fn text(content: impl Into<String>) -> Self {
1089 Self {
1090 text: Some(content.into()),
1091 function_call: None,
1092 function_response: None,
1093 thought: None,
1094 thought_signature: None,
1095 }
1096 }
1097
1098 /// Creates a new function call content part.
1099 pub fn function_call(name: impl Into<String>, args: serde_json::Value) -> Self {
1100 Self {
1101 text: None,
1102 function_call: Some(FunctionCall {
1103 name: name.into(),
1104 args,
1105 }),
1106 function_response: None,
1107 thought: None,
1108 thought_signature: None,
1109 }
1110 }
1111
1112 /// Creates a new function response content part.
1113 pub fn function_response(name: impl Into<String>, response: serde_json::Value) -> Self {
1114 Self {
1115 text: None,
1116 function_call: None,
1117 function_response: Some(FunctionResponse {
1118 name: name.into(),
1119 response,
1120 }),
1121 thought: None,
1122 thought_signature: None,
1123 }
1124 }
1125
1126 /// Adds a thought signature to this content part.
1127 pub fn with_thought_signature(mut self, signature: impl Into<String>) -> Self {
1128 self.thought_signature = Some(signature.into());
1129 self
1130 }
1131
1132 /// Marks this content part as containing thought content.
1133 pub fn as_thought(mut self) -> Self {
1134 self.thought = Some(true);
1135 self
1136 }
1137}
1138
1139impl EnhancedChatMessage {
1140 /// Creates a new enhanced message with the `System` role.
1141 pub fn system(content: impl Into<String>) -> Self {
1142 Self {
1143 role: MessageRole::System,
1144 parts: vec![ContentPart::text(content)],
1145 }
1146 }
1147
1148 /// Creates a new enhanced message with the `User` role.
1149 pub fn user(content: impl Into<String>) -> Self {
1150 Self {
1151 role: MessageRole::User,
1152 parts: vec![ContentPart::text(content)],
1153 }
1154 }
1155
1156 /// Creates a new enhanced message with the `Assistant` role.
1157 pub fn assistant(content: impl Into<String>) -> Self {
1158 Self {
1159 role: MessageRole::Assistant,
1160 parts: vec![ContentPart::text(content)],
1161 }
1162 }
1163
1164 /// Creates a new enhanced message with multiple content parts.
1165 pub fn with_parts(role: MessageRole, parts: Vec<ContentPart>) -> Self {
1166 Self { role, parts }
1167 }
1168}
1169
1170/// Represents a streaming chat completion response (OpenAI delta format).
1171#[derive(Debug, Clone, Serialize, Deserialize)]
1172pub struct ChatCompletionStreamResponse {
1173 /// A unique identifier for the chat completion.
1174 pub id: String,
1175 /// The type of object, which is always "chat.completion.chunk".
1176 pub object: String,
1177 /// The Unix timestamp (in seconds) of when the completion was created.
1178 pub created: u64,
1179 /// The model that was used for the completion.
1180 pub model: String,
1181 /// A list of chat completion choices.
1182 pub choices: Vec<ChatCompletionStreamChoice>,
1183 /// Information about the token usage for this completion (only present in the final chunk).
1184 #[serde(skip_serializing_if = "Option::is_none")]
1185 pub usage: Option<Usage>,
1186}
1187
1188/// Represents a single choice in a streaming chat completion response.
1189#[derive(Debug, Clone, Serialize, Deserialize)]
1190pub struct ChatCompletionStreamChoice {
1191 /// The index of the choice in the list of choices.
1192 pub index: u32,
1193 /// The delta containing the new content for this choice.
1194 pub delta: ChatCompletionStreamDelta,
1195 /// The reason the model stopped generating tokens (only present in the final chunk).
1196 #[serde(skip_serializing_if = "Option::is_none")]
1197 pub finish_reason: Option<String>,
1198}
1199
1200/// Represents the delta (change) in a streaming chat completion response.
1201#[derive(Debug, Clone, Serialize, Deserialize)]
1202pub struct ChatCompletionStreamDelta {
1203 /// The role of the message (only present in the first chunk).
1204 #[serde(skip_serializing_if = "Option::is_none")]
1205 pub role: Option<String>,
1206 /// The new content for this chunk.
1207 #[serde(skip_serializing_if = "Option::is_none")]
1208 pub content: Option<String>,
1209 /// Tool calls for this chunk (if any).
1210 #[serde(skip_serializing_if = "Option::is_none")]
1211 pub tool_calls: Option<Vec<ToolCall>>,
1212}
1213
1214/// Represents a tool call in a streaming response.
1215#[derive(Debug, Clone, Serialize, Deserialize)]
1216pub struct ToolCall {
1217 /// The index of the tool call.
1218 pub index: u32,
1219 /// The ID of the tool call.
1220 #[serde(skip_serializing_if = "Option::is_none")]
1221 pub id: Option<String>,
1222 /// The type of the tool call.
1223 #[serde(skip_serializing_if = "Option::is_none")]
1224 pub r#type: Option<String>,
1225 /// The function being called.
1226 #[serde(skip_serializing_if = "Option::is_none")]
1227 pub function: Option<ToolCallFunction>,
1228}
1229
1230/// Represents a function call in a tool call.
1231#[derive(Debug, Clone, Serialize, Deserialize)]
1232pub struct ToolCallFunction {
1233 /// The name of the function.
1234 #[serde(skip_serializing_if = "Option::is_none")]
1235 pub name: Option<String>,
1236 /// The arguments for the function.
1237 #[serde(skip_serializing_if = "Option::is_none")]
1238 pub arguments: Option<String>,
1239}