rainy_sdk/models.rs
1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4
5/// Represents a single message in a chat conversation.
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
7pub struct ChatMessage {
8 /// The role of the message author.
9 pub role: MessageRole,
10 /// The content of the message.
11 pub content: String,
12}
13
14/// The role of a message's author.
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
16#[serde(rename_all = "lowercase")]
17pub enum MessageRole {
18 /// A message from the system, setting the context or instructions for the assistant.
19 System,
20 /// A message from the user.
21 User,
22 /// A message from the assistant.
23 Assistant,
24}
25
26/// The search provider to use for web research.
27#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
28#[serde(rename_all = "lowercase")]
29pub enum ResearchProvider {
30 /// Use Exa (formerly Metaphor) for high-quality semantic search.
31 #[default]
32 Exa,
33 /// Use Tavily for comprehensive web search and content extraction.
34 Tavily,
35 /// Automatically select the best provider based on the query.
36 Auto,
37}
38
39/// The depth of the research operation.
40#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
41#[serde(rename_all = "lowercase")]
42pub enum ResearchDepth {
43 /// Basic search (faster, lower cost).
44 #[default]
45 Basic,
46 /// Deep search (more thorough, higher cost, includes more context).
47 Advanced,
48}
49
50/// Represents a request to create a chat completion.
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct ChatCompletionRequest {
53 /// The identifier of the model to use for the completion (e.g., "gpt-4o", "claude-sonnet-4").
54 pub model: String,
55
56 /// A list of messages that form the conversation history.
57 pub messages: Vec<ChatMessage>,
58
59 /// The sampling temperature to use, between 0.0 and 2.0. Higher values will make the output
60 /// more random, while lower values will make it more focused and deterministic.
61 #[serde(skip_serializing_if = "Option::is_none")]
62 pub temperature: Option<f32>,
63
64 /// The maximum number of tokens to generate in the completion.
65 #[serde(skip_serializing_if = "Option::is_none")]
66 pub max_tokens: Option<u32>,
67
68 /// The nucleus sampling parameter. The model considers the results of the tokens with `top_p`
69 /// probability mass. So, 0.1 means only the tokens comprising the top 10% probability mass are considered.
70 #[serde(skip_serializing_if = "Option::is_none")]
71 pub top_p: Option<f32>,
72
73 /// A penalty applied to new tokens based on their frequency in the text so far.
74 /// It decreases the model's likelihood to repeat the same line verbatim.
75 #[serde(skip_serializing_if = "Option::is_none")]
76 pub frequency_penalty: Option<f32>,
77
78 /// A penalty applied to new tokens based on whether they appear in the text so far.
79 /// It increases the model's likelihood to talk about new topics.
80 #[serde(skip_serializing_if = "Option::is_none")]
81 pub presence_penalty: Option<f32>,
82
83 /// A list of sequences that will cause the model to stop generating further tokens.
84 #[serde(skip_serializing_if = "Option::is_none")]
85 pub stop: Option<Vec<String>>,
86
87 /// A unique identifier representing your end-user, which can help in monitoring and
88 /// tracking conversations.
89 #[serde(skip_serializing_if = "Option::is_none")]
90 pub user: Option<String>,
91
92 /// A hint to the router about which provider to use for the model.
93 #[serde(skip_serializing_if = "Option::is_none")]
94 pub provider: Option<String>,
95
96 /// If set to `true`, the response will be streamed as a series of events.
97 #[serde(skip_serializing_if = "Option::is_none")]
98 pub stream: Option<bool>,
99
100 /// Modify the likelihood of specified tokens appearing in the completion.
101 #[serde(skip_serializing_if = "Option::is_none")]
102 pub logit_bias: Option<serde_json::Value>,
103
104 /// Whether to return log probabilities of the output tokens.
105 #[serde(skip_serializing_if = "Option::is_none")]
106 pub logprobs: Option<bool>,
107
108 /// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position.
109 #[serde(skip_serializing_if = "Option::is_none")]
110 pub top_logprobs: Option<u32>,
111
112 /// How many chat completion choices to generate for each input message.
113 #[serde(skip_serializing_if = "Option::is_none")]
114 pub n: Option<u32>,
115
116 /// An object specifying the format that the model must output.
117 #[serde(skip_serializing_if = "Option::is_none")]
118 pub response_format: Option<ResponseFormat>,
119
120 /// A list of tools the model may call.
121 #[serde(skip_serializing_if = "Option::is_none")]
122 pub tools: Option<Vec<Tool>>,
123
124 /// Controls which (if any) tool is called by the model.
125 #[serde(skip_serializing_if = "Option::is_none")]
126 pub tool_choice: Option<ToolChoice>,
127
128 /// Configuration for thinking capabilities (Gemini 3 and 2.5 series).
129 #[serde(skip_serializing_if = "Option::is_none")]
130 pub thinking_config: Option<ThinkingConfig>,
131}
132
133/// Represents the response from a chat completion request.
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct ChatCompletionResponse {
136 /// A unique identifier for the chat completion.
137 pub id: String,
138
139 /// The type of object, which is always "chat.completion".
140 pub object: String,
141
142 /// The Unix timestamp (in seconds) of when the completion was created.
143 pub created: u64,
144
145 /// The model that was used for the completion.
146 pub model: String,
147
148 /// A list of chat completion choices.
149 pub choices: Vec<ChatChoice>,
150
151 /// Information about the token usage for this completion.
152 #[serde(skip_serializing_if = "Option::is_none")]
153 pub usage: Option<Usage>,
154}
155
156/// Represents a single choice in a chat completion response.
157#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct ChatChoice {
159 /// The index of the choice in the list of choices.
160 pub index: u32,
161
162 /// The message generated by the model.
163 pub message: ChatMessage,
164
165 /// The reason the model stopped generating tokens.
166 pub finish_reason: String,
167}
168
169/// Represents the token usage statistics for a chat completion.
170#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct Usage {
172 /// The number of tokens in the prompt.
173 pub prompt_tokens: u32,
174
175 /// The number of tokens in the generated completion.
176 pub completion_tokens: u32,
177
178 /// The total number of tokens used in the request (prompt + completion).
179 pub total_tokens: u32,
180}
181
182/// Represents the health status of the Rainy API.
183#[derive(Debug, Clone, Serialize, Deserialize)]
184pub struct HealthStatus {
185 /// The overall status of the API (e.g., "healthy", "degraded").
186 pub status: String,
187
188 /// The timestamp of when the health check was performed.
189 pub timestamp: String,
190
191 /// The uptime of the system in seconds.
192 pub uptime: f64,
193
194 /// The status of individual services.
195 pub services: ServiceStatus,
196}
197
198/// Represents the status of individual backend services.
199#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct ServiceStatus {
201 /// The status of the database connection.
202 pub database: bool,
203
204 /// The status of the Redis connection, if applicable.
205 #[serde(skip_serializing_if = "Option::is_none")]
206 pub redis: Option<bool>,
207
208 /// The overall status of the connections to AI providers.
209 pub providers: bool,
210}
211
212/// Represents the available models and providers.
213#[derive(Debug, Clone, Serialize, Deserialize)]
214pub struct AvailableModels {
215 /// A map where keys are provider names and values are lists of model names.
216 pub providers: HashMap<String, Vec<String>>,
217
218 /// The total number of available models across all providers.
219 pub total_models: usize,
220
221 /// A list of provider names that are currently active and available.
222 pub active_providers: Vec<String>,
223}
224
225/// Represents information about credit usage for a request.
226#[derive(Debug, Clone, Serialize, Deserialize)]
227pub struct CreditInfo {
228 /// The number of credits available before the request.
229 pub current_credits: f64,
230
231 /// The estimated number of credits that the request will cost.
232 pub estimated_cost: f64,
233
234 /// The estimated number of credits remaining after the request.
235 pub credits_after_request: f64,
236
237 /// The date when the credit balance is next scheduled to be reset.
238 pub reset_date: String,
239}
240
241/// Represents metadata extracted from the response headers of an API request.
242#[derive(Debug, Clone)]
243pub struct RequestMetadata {
244 /// The time taken for the request to complete, in milliseconds.
245 pub response_time: Option<u64>,
246
247 /// The AI provider that handled the request.
248 pub provider: Option<String>,
249
250 /// The number of tokens used in the request.
251 pub tokens_used: Option<u32>,
252
253 /// The number of credits used for the request.
254 pub credits_used: Option<f64>,
255
256 /// The number of credits remaining after the request.
257 pub credits_remaining: Option<f64>,
258
259 /// The unique ID of the request, for tracking and debugging.
260 pub request_id: Option<String>,
261}
262
263/// A collection of predefined model constants for convenience.
264/// All models listed here are confirmed to be 100% OpenAI-compatible without parameter adaptations.
265pub mod model_constants {
266 // OpenAI models (fully compatible)
267 /// Constant for the GPT-4o model.
268 pub const OPENAI_GPT_4O: &str = "gpt-4o";
269 /// Constant for the GPT-5 model.
270 pub const OPENAI_GPT_5: &str = "gpt-5";
271 /// Constant for the GPT-5 Pro model.
272 pub const OPENAI_GPT_5_PRO: &str = "gpt-5-pro";
273 /// Constant for the O3 model.
274 pub const OPENAI_O3: &str = "o3";
275 /// Constant for the O4 Mini model.
276 pub const OPENAI_O4_MINI: &str = "o4-mini";
277
278 // Google Gemini models (fully compatible via official compatibility layer)
279 /// Constant for the Gemini 2.5 Pro model.
280 pub const GOOGLE_GEMINI_2_5_PRO: &str = "gemini-2.5-pro";
281 /// Constant for the Gemini 2.5 Flash model.
282 pub const GOOGLE_GEMINI_2_5_FLASH: &str = "gemini-2.5-flash";
283 /// Constant for the Gemini 2.5 Flash Lite model.
284 pub const GOOGLE_GEMINI_2_5_FLASH_LITE: &str = "gemini-2.5-flash-lite";
285
286 // Gemini 3 series - Advanced reasoning models with thinking capabilities
287 /// Constant for the Gemini 3 Pro model with advanced reasoning.
288 pub const GOOGLE_GEMINI_3_PRO: &str = "gemini-3-pro-preview";
289 /// Constant for the Gemini 3 Flash model with thinking capabilities.
290 pub const GOOGLE_GEMINI_3_FLASH: &str = "gemini-3-flash-preview";
291 /// Constant for the Gemini 3 Pro Image model with multimodal reasoning.
292 pub const GOOGLE_GEMINI_3_PRO_IMAGE: &str = "gemini-3-pro-image-preview";
293
294 // Groq models (fully compatible)
295 /// Constant for the Llama 3.1 8B Instant model.
296 pub const GROQ_LLAMA_3_1_8B_INSTANT: &str = "llama-3.1-8b-instant";
297 /// Constant for the Llama 3.3 70B Versatile model.
298 pub const GROQ_LLAMA_3_3_70B_VERSATILE: &str = "llama-3.3-70b-versatile";
299 /// Constant for the moonshotai/kimi-k2-instruct-0905 Instant model.
300 pub const KIMI_K2_0925: &str = "moonshotai/kimi-k2-instruct-0905";
301
302 // Cerebras models (fully compatible)
303 /// Constant for the Llama3.1 8B model.
304 pub const CEREBRAS_LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
305
306 // Enosis Labs models (fully compatible)
307 /// Constant for the Astronomer 1 model.
308 pub const ASTRONOMER_1: &str = "astronomer-1";
309 /// Constant for the Astronomer 1 Max model.
310 pub const ASTRONOMER_1_MAX: &str = "astronomer-1-max";
311 /// Constant for the Astronomer 1.5 model.
312 pub const ASTRONOMER_1_5: &str = "astronomer-1.5";
313 /// Constant for the Astronomer 2 model.
314 pub const ASTRONOMER_2: &str = "astronomer-2";
315 /// Constant for the Astronomer 2 Pro model.
316 pub const ASTRONOMER_2_PRO: &str = "astronomer-2-pro";
317
318 // Legacy aliases for backward compatibility (deprecated - use provider-prefixed versions above)
319 /// Legacy constant for the GPT-4o model (use OPENAI_GPT_4O instead).
320 #[deprecated(note = "Use OPENAI_GPT_4O instead for OpenAI compatibility")]
321 pub const GPT_4O: &str = "openai/gpt-4o";
322 /// Legacy constant for the GPT-5 model (use OPENAI_GPT_5 instead).
323 #[deprecated(note = "Use OPENAI_GPT_5 instead for OpenAI compatibility")]
324 pub const GPT_5: &str = "openai/gpt-5";
325 /// Legacy constant for the Gemini 2.5 Pro model (use GOOGLE_GEMINI_2_5_PRO instead).
326 #[deprecated(note = "Use GOOGLE_GEMINI_2_5_PRO instead for OpenAI compatibility")]
327 pub const GEMINI_2_5_PRO: &str = "google/gemini-2.5-pro";
328 /// Legacy constant for the Gemini 2.5 Flash model (use GOOGLE_GEMINI_2_5_FLASH instead).
329 #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH instead for OpenAI compatibility")]
330 pub const GEMINI_2_5_FLASH: &str = "google/gemini-2.5-flash";
331 /// Legacy constant for the Gemini 2.5 Flash Lite model (use GOOGLE_GEMINI_2_5_FLASH_LITE instead).
332 #[deprecated(note = "Use GOOGLE_GEMINI_2_5_FLASH_LITE instead for OpenAI compatibility")]
333 pub const GEMINI_2_5_FLASH_LITE: &str = "google/gemini-2.5-flash-lite";
334 /// Legacy constant for the Llama 3.1 8B Instant model (use GROQ_LLAMA_3_1_8B_INSTANT instead).
335 #[deprecated(note = "Use GROQ_LLAMA_3_1_8B_INSTANT instead for OpenAI compatibility")]
336 pub const LLAMA_3_1_8B_INSTANT: &str = "groq/llama-3.1-8b-instant";
337 /// Legacy constant for the Llama3.1 8B model (use CEREBRAS_LLAMA3_1_8B instead).
338 #[deprecated(note = "Use CEREBRAS_LLAMA3_1_8B instead for OpenAI compatibility")]
339 pub const LLAMA3_1_8B: &str = "cerebras/llama3.1-8b";
340}
341
342/// A collection of predefined provider name constants for convenience.
343pub mod providers {
344 /// Constant for the OpenAI provider.
345 pub const OPENAI: &str = "openai";
346 /// Constant for the Anthropic provider.
347 pub const ANTHROPIC: &str = "anthropic";
348 /// Constant for the Groq provider.
349 pub const GROQ: &str = "groq";
350 /// Constant for the Cerebras provider.
351 pub const CEREBRAS: &str = "cerebras";
352 /// Constant for the Gemini provider.
353 pub const GEMINI: &str = "gemini";
354 /// Constant for the Enosis Labs provider.
355 pub const ENOSISLABS: &str = "enosislabs";
356}
357
358impl ChatCompletionRequest {
359 /// Creates a new `ChatCompletionRequest` with the given model and messages.
360 ///
361 /// # Arguments
362 ///
363 /// * `model` - The identifier of the model to use.
364 /// * `messages` - The list of messages for the conversation.
365 pub fn new(model: impl Into<String>, messages: Vec<ChatMessage>) -> Self {
366 Self {
367 model: model.into(),
368 messages,
369 temperature: None,
370 max_tokens: None,
371 top_p: None,
372 frequency_penalty: None,
373 presence_penalty: None,
374 stop: None,
375 user: None,
376 provider: None,
377 stream: None,
378 logit_bias: None,
379 logprobs: None,
380 top_logprobs: None,
381 n: None,
382 response_format: None,
383 tools: None,
384 tool_choice: None,
385 thinking_config: None,
386 }
387 }
388
389 /// Sets the temperature for the chat completion.
390 ///
391 /// The temperature is clamped between 0.0 and 2.0.
392 ///
393 /// # Arguments
394 ///
395 /// * `temperature` - The sampling temperature.
396 pub fn with_temperature(mut self, temperature: f32) -> Self {
397 self.temperature = Some(temperature.clamp(0.0, 2.0));
398 self
399 }
400
401 /// Sets the maximum number of tokens to generate.
402 ///
403 /// # Arguments
404 ///
405 /// * `max_tokens` - The maximum number of tokens.
406 pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
407 self.max_tokens = Some(max_tokens);
408 self
409 }
410
411 /// Sets the user identifier for the chat completion.
412 ///
413 /// # Arguments
414 ///
415 /// * `user` - A unique identifier for the end-user.
416 pub fn with_user(mut self, user: impl Into<String>) -> Self {
417 self.user = Some(user.into());
418 self
419 }
420
421 /// Sets a provider hint for the request.
422 ///
423 /// # Arguments
424 ///
425 /// * `provider` - The name of the provider to use.
426 pub fn with_provider(mut self, provider: impl Into<String>) -> Self {
427 self.provider = Some(provider.into());
428 self
429 }
430
431 /// Enables or disables streaming for the response.
432 ///
433 /// # Arguments
434 ///
435 /// * `stream` - `true` to enable streaming, `false` to disable.
436 pub fn with_stream(mut self, stream: bool) -> Self {
437 self.stream = Some(stream);
438 self
439 }
440
441 /// Sets the logit bias for the chat completion.
442 ///
443 /// # Arguments
444 ///
445 /// * `logit_bias` - A map of token IDs to bias values.
446 pub fn with_logit_bias(mut self, logit_bias: serde_json::Value) -> Self {
447 self.logit_bias = Some(logit_bias);
448 self
449 }
450
451 /// Enables or disables log probabilities for the response.
452 ///
453 /// # Arguments
454 ///
455 /// * `logprobs` - `true` to include log probabilities.
456 pub fn with_logprobs(mut self, logprobs: bool) -> Self {
457 self.logprobs = Some(logprobs);
458 self
459 }
460
461 /// Sets the number of most likely tokens to return at each position.
462 ///
463 /// # Arguments
464 ///
465 /// * `top_logprobs` - The number of top log probabilities to return.
466 pub fn with_top_logprobs(mut self, top_logprobs: u32) -> Self {
467 self.top_logprobs = Some(top_logprobs);
468 self
469 }
470
471 /// Sets the number of chat completion choices to generate.
472 ///
473 /// # Arguments
474 ///
475 /// * `n` - The number of completions to generate.
476 pub fn with_n(mut self, n: u32) -> Self {
477 self.n = Some(n);
478 self
479 }
480
481 /// Sets the response format for the chat completion.
482 ///
483 /// # Arguments
484 ///
485 /// * `response_format` - The format the model must output.
486 pub fn with_response_format(mut self, response_format: ResponseFormat) -> Self {
487 self.response_format = Some(response_format);
488 self
489 }
490
491 /// Sets the tools available to the model.
492 ///
493 /// # Arguments
494 ///
495 /// * `tools` - A list of tools the model can use.
496 pub fn with_tools(mut self, tools: Vec<Tool>) -> Self {
497 self.tools = Some(tools);
498 self
499 }
500
501 /// Sets the tool choice for the chat completion.
502 ///
503 /// # Arguments
504 ///
505 /// * `tool_choice` - Controls which tool the model uses.
506 pub fn with_tool_choice(mut self, tool_choice: ToolChoice) -> Self {
507 self.tool_choice = Some(tool_choice);
508 self
509 }
510
511 /// Sets the thinking configuration for Gemini 3 and 2.5 series models.
512 ///
513 /// # Arguments
514 ///
515 /// * `thinking_config` - Configuration for thinking capabilities.
516 pub fn with_thinking_config(mut self, thinking_config: ThinkingConfig) -> Self {
517 self.thinking_config = Some(thinking_config);
518 self
519 }
520
521 /// Enables thought summaries in the response (Gemini 3 and 2.5 series).
522 ///
523 /// # Arguments
524 ///
525 /// * `include_thoughts` - Whether to include thought summaries.
526 pub fn with_include_thoughts(mut self, include_thoughts: bool) -> Self {
527 let mut config = self.thinking_config.unwrap_or_default();
528 config.include_thoughts = Some(include_thoughts);
529 self.thinking_config = Some(config);
530 self
531 }
532
533 /// Sets the thinking level for Gemini 3 models.
534 ///
535 /// # Arguments
536 ///
537 /// * `thinking_level` - The thinking level (minimal, low, medium, high).
538 pub fn with_thinking_level(mut self, thinking_level: ThinkingLevel) -> Self {
539 let mut config = self.thinking_config.unwrap_or_default();
540 config.thinking_level = Some(thinking_level);
541 self.thinking_config = Some(config);
542 self
543 }
544
545 /// Sets the thinking budget for Gemini 2.5 models.
546 ///
547 /// # Arguments
548 ///
549 /// * `thinking_budget` - Number of thinking tokens (-1 for dynamic, 0 to disable).
550 pub fn with_thinking_budget(mut self, thinking_budget: i32) -> Self {
551 let mut config = self.thinking_config.unwrap_or_default();
552 config.thinking_budget = Some(thinking_budget);
553 self.thinking_config = Some(config);
554 self
555 }
556
557 /// Validates that the request parameters are compatible with OpenAI standards.
558 ///
559 /// This method checks parameter ranges and values to ensure they match OpenAI's API specifications.
560 /// Also validates Gemini 3 specific parameters like thinking configuration.
561 ///
562 /// # Returns
563 ///
564 /// A `Result` indicating whether the request is valid for OpenAI compatibility.
565 pub fn validate_openai_compatibility(&self) -> Result<(), String> {
566 // Validate temperature
567 if let Some(temp) = self.temperature {
568 if !(0.0..=2.0).contains(&temp) {
569 return Err(format!(
570 "Temperature must be between 0.0 and 2.0, got {}",
571 temp
572 ));
573 }
574 }
575
576 // Validate top_p
577 if let Some(top_p) = self.top_p {
578 if !(0.0..=1.0).contains(&top_p) {
579 return Err(format!("Top-p must be between 0.0 and 1.0, got {}", top_p));
580 }
581 }
582
583 // Validate frequency_penalty
584 if let Some(fp) = self.frequency_penalty {
585 if !(-2.0..=2.0).contains(&fp) {
586 return Err(format!(
587 "Frequency penalty must be between -2.0 and 2.0, got {}",
588 fp
589 ));
590 }
591 }
592
593 // Validate presence_penalty
594 if let Some(pp) = self.presence_penalty {
595 if !(-2.0..=2.0).contains(&pp) {
596 return Err(format!(
597 "Presence penalty must be between -2.0 and 2.0, got {}",
598 pp
599 ));
600 }
601 }
602
603 // Validate max_tokens
604 if let Some(mt) = self.max_tokens {
605 if mt == 0 {
606 return Err("Max tokens must be greater than 0".to_string());
607 }
608 }
609
610 // Validate top_logprobs
611 if let Some(tlp) = self.top_logprobs {
612 if !(0..=20).contains(&tlp) {
613 return Err(format!(
614 "Top logprobs must be between 0 and 20, got {}",
615 tlp
616 ));
617 }
618 }
619
620 // Validate n
621 if let Some(n) = self.n {
622 if n == 0 {
623 return Err("n must be greater than 0".to_string());
624 }
625 }
626
627 // Validate stop sequences
628 if let Some(stop) = &self.stop {
629 if stop.len() > 4 {
630 return Err("Cannot have more than 4 stop sequences".to_string());
631 }
632 for seq in stop {
633 if seq.is_empty() {
634 return Err("Stop sequences cannot be empty".to_string());
635 }
636 if seq.len() > 64 {
637 return Err("Stop sequences cannot be longer than 64 characters".to_string());
638 }
639 }
640 }
641
642 // Validate thinking configuration for Gemini models
643 if let Some(thinking_config) = &self.thinking_config {
644 self.validate_thinking_config(thinking_config)?;
645 }
646
647 Ok(())
648 }
649
650 /// Validates thinking configuration parameters for Gemini models.
651 fn validate_thinking_config(&self, config: &ThinkingConfig) -> Result<(), String> {
652 let is_gemini_3 = self.model.contains("gemini-3");
653 let is_gemini_2_5 = self.model.contains("gemini-2.5");
654 let is_gemini_3_pro = self.model.contains("gemini-3-pro");
655
656 // Validate thinking level (Gemini 3 only)
657 if let Some(level) = &config.thinking_level {
658 if !is_gemini_3 {
659 return Err("thinking_level is only supported for Gemini 3 models".to_string());
660 }
661
662 match level {
663 ThinkingLevel::Minimal | ThinkingLevel::Medium => {
664 if is_gemini_3_pro {
665 return Err(
666 "Gemini 3 Pro only supports 'low' and 'high' thinking levels"
667 .to_string(),
668 );
669 }
670 }
671 _ => {}
672 }
673 }
674
675 // Validate thinking budget (Gemini 2.5 only)
676 if let Some(budget) = config.thinking_budget {
677 if !is_gemini_2_5 {
678 return Err("thinking_budget is only supported for Gemini 2.5 models".to_string());
679 }
680
681 // Validate budget ranges based on model
682 if self.model.contains("2.5-pro") {
683 if budget != -1 && !(128..=32768).contains(&budget) {
684 return Err(
685 "Gemini 2.5 Pro thinking budget must be -1 (dynamic) or between 128-32768"
686 .to_string(),
687 );
688 }
689 } else if self.model.contains("2.5-flash")
690 && budget != -1
691 && !(0..=24576).contains(&budget)
692 {
693 return Err(
694 "Gemini 2.5 Flash thinking budget must be -1 (dynamic) or between 0-24576"
695 .to_string(),
696 );
697 }
698 }
699
700 // Warn about conflicting parameters
701 if config.thinking_level.is_some() && config.thinking_budget.is_some() {
702 return Err("Cannot specify both thinking_level (Gemini 3) and thinking_budget (Gemini 2.5) in the same request".to_string());
703 }
704
705 Ok(())
706 }
707
708 /// Checks if the model supports thinking capabilities.
709 pub fn supports_thinking(&self) -> bool {
710 self.model.contains("gemini-3") || self.model.contains("gemini-2.5")
711 }
712
713 /// Checks if the model requires thought signatures for function calling.
714 pub fn requires_thought_signatures(&self) -> bool {
715 self.model.contains("gemini-3")
716 }
717}
718
719impl ChatMessage {
720 /// Creates a new message with the `System` role.
721 ///
722 /// # Arguments
723 ///
724 /// * `content` - The content of the system message.
725 pub fn system(content: impl Into<String>) -> Self {
726 Self {
727 role: MessageRole::System,
728 content: content.into(),
729 }
730 }
731
732 /// Creates a new message with the `User` role.
733 ///
734 /// # Arguments
735 ///
736 /// * `content` - The content of the user message.
737 pub fn user(content: impl Into<String>) -> Self {
738 Self {
739 role: MessageRole::User,
740 content: content.into(),
741 }
742 }
743
744 /// Creates a new message with the `Assistant` role.
745 ///
746 /// # Arguments
747 ///
748 /// * `content` - The content of the assistant message.
749 pub fn assistant(content: impl Into<String>) -> Self {
750 Self {
751 role: MessageRole::Assistant,
752 content: content.into(),
753 }
754 }
755}
756
757// Legacy compatibility types - keep existing types for backward compatibility
758use uuid::Uuid;
759
760/// Represents a user account (legacy).
761#[derive(Debug, Clone, Serialize, Deserialize)]
762pub struct User {
763 /// The unique ID of the user.
764 pub id: Uuid,
765 /// The user's identifier string.
766 pub user_id: String,
767 /// The name of the user's subscription plan.
768 pub plan_name: String,
769 /// The user's current credit balance.
770 pub current_credits: f64,
771 /// The amount of credits the user has used in the current month.
772 pub credits_used_this_month: f64,
773 /// The date when the user's credits will reset.
774 pub credits_reset_date: DateTime<Utc>,
775 /// Indicates if the user account is active.
776 pub is_active: bool,
777 /// The timestamp of when the user account was created.
778 pub created_at: DateTime<Utc>,
779}
780
781/// Represents an API key (legacy).
782#[derive(Debug, Clone, Serialize, Deserialize)]
783pub struct ApiKey {
784 /// The unique ID of the API key.
785 pub id: Uuid,
786 /// The API key string.
787 pub key: String,
788 /// The ID of the user who owns the key.
789 pub owner_id: Uuid,
790 /// Indicates if the API key is active.
791 pub is_active: bool,
792 /// The timestamp of when the key was created.
793 pub created_at: DateTime<Utc>,
794 /// The expiration date of the key, if any.
795 pub expires_at: Option<DateTime<Utc>>,
796 /// A description of the key.
797 pub description: Option<String>,
798 /// The timestamp of when the key was last used.
799 pub last_used_at: Option<DateTime<Utc>>,
800}
801
802/// Represents usage statistics over a period (legacy).
803#[derive(Debug, Clone, Serialize, Deserialize)]
804pub struct UsageStats {
805 /// The number of days in the usage period.
806 pub period_days: u32,
807 /// A list of daily usage data.
808 pub daily_usage: Vec<DailyUsage>,
809 /// A list of recent credit transactions.
810 pub recent_transactions: Vec<CreditTransaction>,
811 /// The total number of requests made in the period.
812 pub total_requests: u64,
813 /// The total number of tokens used in the period.
814 pub total_tokens: u64,
815}
816
817/// Represents usage data for a single day (legacy).
818#[derive(Debug, Clone, Serialize, Deserialize)]
819pub struct DailyUsage {
820 /// The date for the usage data.
821 pub date: String,
822 /// The number of credits used on this day.
823 pub credits_used: f64,
824 /// The number of requests made on this day.
825 pub requests: u64,
826 /// The number of tokens used on this day.
827 pub tokens: u64,
828}
829
830/// Represents a single credit transaction (legacy).
831#[derive(Debug, Clone, Serialize, Deserialize)]
832pub struct CreditTransaction {
833 /// The unique ID of the transaction.
834 pub id: Uuid,
835 /// The type of the transaction.
836 pub transaction_type: TransactionType,
837 /// The amount of credits involved in the transaction.
838 pub credits_amount: f64,
839 /// The credit balance after the transaction.
840 pub credits_balance_after: f64,
841 /// The provider associated with the transaction, if any.
842 pub provider: Option<String>,
843 /// The model associated with the transaction, if any.
844 pub model: Option<String>,
845 /// A description of the transaction.
846 pub description: String,
847 /// The timestamp of when the transaction occurred.
848 pub created_at: DateTime<Utc>,
849}
850
851/// The type of credit transaction (legacy).
852#[derive(Debug, Clone, Serialize, Deserialize)]
853#[serde(rename_all = "lowercase")]
854pub enum TransactionType {
855 /// A transaction for API usage.
856 Usage,
857 /// A transaction for a credit reset.
858 Reset,
859 /// A transaction for a credit purchase.
860 Purchase,
861 /// A transaction for a credit refund.
862 Refund,
863}
864
865// Legacy aliases for backward compatibility
866/// A legacy type alias for `MessageRole`.
867pub type ChatRole = MessageRole;
868/// A legacy type alias for `Usage`.
869pub type ChatUsage = Usage;
870/// A legacy type alias for `HealthStatus`.
871pub type HealthCheck = HealthStatus;
872
873/// Represents the status of backend services (legacy).
874#[derive(Debug, Clone, Serialize, Deserialize)]
875pub struct HealthServices {
876 /// The status of the database connection.
877 pub database: bool,
878 /// The status of the Redis connection.
879 pub redis: bool,
880 /// The overall status of AI providers.
881 pub providers: bool,
882}
883
884/// The health status of the API (legacy).
885#[derive(Debug, Clone, Serialize, Deserialize)]
886#[serde(rename_all = "lowercase")]
887pub enum HealthStatusEnum {
888 /// The API is healthy.
889 Healthy,
890 /// The API is in a degraded state.
891 Degraded,
892 /// The API is unhealthy.
893 Unhealthy,
894 /// The API needs initialization.
895 NeedsInit,
896}
897
898/// Represents the format that the model must output.
899#[derive(Debug, Clone, Serialize, Deserialize)]
900#[serde(rename_all = "snake_case")]
901pub enum ResponseFormat {
902 /// The model can return text.
903 Text,
904 /// The model must return a valid JSON object.
905 JsonObject,
906 /// The model must return a JSON object that matches the provided schema.
907 JsonSchema {
908 /// The JSON Schema that the model's output must conform to.
909 json_schema: serde_json::Value,
910 },
911}
912
913/// Represents a tool that the model can use.
914#[derive(Debug, Clone, Serialize, Deserialize)]
915pub struct Tool {
916 /// The type of the tool (currently only "function" is supported).
917 pub r#type: ToolType,
918 /// The function definition describing the tool's capabilities.
919 pub function: FunctionDefinition,
920}
921
922/// The type of tool.
923#[derive(Debug, Clone, Serialize, Deserialize)]
924#[serde(rename_all = "snake_case")]
925pub enum ToolType {
926 /// A function tool.
927 Function,
928}
929
930/// Represents a function definition for a tool.
931#[derive(Debug, Clone, Serialize, Deserialize)]
932pub struct FunctionDefinition {
933 /// The name of the function.
934 pub name: String,
935 /// A description of what the function does.
936 #[serde(skip_serializing_if = "Option::is_none")]
937 pub description: Option<String>,
938 /// The parameters the function accepts, described as a JSON Schema object.
939 #[serde(skip_serializing_if = "Option::is_none")]
940 pub parameters: Option<serde_json::Value>,
941}
942
943/// Controls which tool is called by the model.
944#[derive(Debug, Clone, Serialize, Deserialize)]
945#[serde(untagged)]
946pub enum ToolChoice {
947 /// No tool is called.
948 None,
949 /// The model chooses which tool to call.
950 Auto,
951 /// A specific tool is called.
952 Tool {
953 /// The type of the tool being called.
954 r#type: ToolType,
955 /// The function to call within the tool.
956 function: ToolFunction,
957 },
958}
959
960/// Represents a tool function call.
961#[derive(Debug, Clone, Serialize, Deserialize)]
962pub struct ToolFunction {
963 /// The name of the function to call.
964 pub name: String,
965}
966
967/// Configuration for thinking capabilities in Gemini 3 and 2.5 series models.
968#[derive(Debug, Clone, Serialize, Deserialize, Default)]
969pub struct ThinkingConfig {
970 /// Whether to include thought summaries in the response.
971 #[serde(skip_serializing_if = "Option::is_none")]
972 pub include_thoughts: Option<bool>,
973
974 /// The thinking level for Gemini 3 models (low, high for Pro; minimal, low, medium, high for Flash).
975 #[serde(skip_serializing_if = "Option::is_none")]
976 pub thinking_level: Option<ThinkingLevel>,
977
978 /// The thinking budget for Gemini 2.5 models (number of thinking tokens).
979 #[serde(skip_serializing_if = "Option::is_none")]
980 pub thinking_budget: Option<i32>,
981}
982
983/// Thinking levels for Gemini 3 models.
984#[derive(Debug, Clone, Serialize, Deserialize)]
985#[serde(rename_all = "lowercase")]
986pub enum ThinkingLevel {
987 /// Minimal thinking (Gemini 3 Flash only) - model likely won't think.
988 Minimal,
989 /// Low thinking level - faster responses with basic reasoning.
990 Low,
991 /// Medium thinking level (Gemini 3 Flash only) - balanced reasoning and speed.
992 Medium,
993 /// High thinking level - deep reasoning for complex tasks (default).
994 High,
995}
996
997/// Represents a content part that may include thought signatures.
998#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
999pub struct ContentPart {
1000 /// The text content of the part.
1001 #[serde(skip_serializing_if = "Option::is_none")]
1002 pub text: Option<String>,
1003
1004 /// Function call information if this part contains a function call.
1005 #[serde(skip_serializing_if = "Option::is_none")]
1006 pub function_call: Option<FunctionCall>,
1007
1008 /// Function response information if this part contains a function response.
1009 #[serde(skip_serializing_if = "Option::is_none")]
1010 pub function_response: Option<FunctionResponse>,
1011
1012 /// Indicates if this part contains thought content.
1013 #[serde(skip_serializing_if = "Option::is_none")]
1014 pub thought: Option<bool>,
1015
1016 /// Encrypted thought signature for preserving reasoning context across turns.
1017 #[serde(skip_serializing_if = "Option::is_none")]
1018 pub thought_signature: Option<String>,
1019}
1020
1021/// Represents a function call in the content.
1022#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1023pub struct FunctionCall {
1024 /// The name of the function being called.
1025 pub name: String,
1026 /// The arguments for the function call as a JSON object.
1027 pub args: serde_json::Value,
1028}
1029
1030/// Represents a function response in the content.
1031#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1032pub struct FunctionResponse {
1033 /// The name of the function that was called.
1034 pub name: String,
1035 /// The response from the function call.
1036 pub response: serde_json::Value,
1037}
1038
1039/// Enhanced chat message that supports Gemini 3 thinking capabilities.
1040#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
1041pub struct EnhancedChatMessage {
1042 /// The role of the message author.
1043 pub role: MessageRole,
1044 /// The content parts of the message (supports text, function calls, and thought signatures).
1045 pub parts: Vec<ContentPart>,
1046}
1047
1048/// Enhanced usage statistics that include thinking tokens.
1049#[derive(Debug, Clone, Serialize, Deserialize)]
1050pub struct EnhancedUsage {
1051 /// The number of tokens in the prompt.
1052 pub prompt_tokens: u32,
1053 /// The number of tokens in the generated completion.
1054 pub completion_tokens: u32,
1055 /// The total number of tokens used in the request (prompt + completion).
1056 pub total_tokens: u32,
1057 /// The number of thinking tokens used (Gemini 3 and 2.5 series).
1058 #[serde(skip_serializing_if = "Option::is_none")]
1059 pub thoughts_token_count: Option<u32>,
1060}
1061
1062impl ThinkingConfig {
1063 /// Creates a new thinking configuration with default values.
1064 pub fn new() -> Self {
1065 Self::default()
1066 }
1067
1068 /// Creates a configuration for Gemini 3 models with specified thinking level.
1069 ///
1070 /// # Arguments
1071 ///
1072 /// * `level` - The thinking level to use.
1073 /// * `include_thoughts` - Whether to include thought summaries.
1074 pub fn gemini_3(level: ThinkingLevel, include_thoughts: bool) -> Self {
1075 Self {
1076 thinking_level: Some(level),
1077 include_thoughts: Some(include_thoughts),
1078 thinking_budget: None,
1079 }
1080 }
1081
1082 /// Creates a configuration for Gemini 2.5 models with specified thinking budget.
1083 ///
1084 /// # Arguments
1085 ///
1086 /// * `budget` - The thinking budget (-1 for dynamic, 0 to disable, or specific token count).
1087 /// * `include_thoughts` - Whether to include thought summaries.
1088 pub fn gemini_2_5(budget: i32, include_thoughts: bool) -> Self {
1089 Self {
1090 thinking_budget: Some(budget),
1091 include_thoughts: Some(include_thoughts),
1092 thinking_level: None,
1093 }
1094 }
1095
1096 /// Creates a configuration optimized for complex reasoning tasks.
1097 pub fn high_reasoning() -> Self {
1098 Self {
1099 thinking_level: Some(ThinkingLevel::High),
1100 include_thoughts: Some(true),
1101 thinking_budget: Some(-1), // Dynamic for 2.5 models
1102 }
1103 }
1104
1105 /// Creates a configuration optimized for fast responses.
1106 pub fn fast_response() -> Self {
1107 Self {
1108 thinking_level: Some(ThinkingLevel::Low),
1109 include_thoughts: Some(false),
1110 thinking_budget: Some(512), // Low budget for 2.5 models
1111 }
1112 }
1113}
1114
1115impl ContentPart {
1116 /// Creates a new text content part.
1117 pub fn text(content: impl Into<String>) -> Self {
1118 Self {
1119 text: Some(content.into()),
1120 function_call: None,
1121 function_response: None,
1122 thought: None,
1123 thought_signature: None,
1124 }
1125 }
1126
1127 /// Creates a new function call content part.
1128 pub fn function_call(name: impl Into<String>, args: serde_json::Value) -> Self {
1129 Self {
1130 text: None,
1131 function_call: Some(FunctionCall {
1132 name: name.into(),
1133 args,
1134 }),
1135 function_response: None,
1136 thought: None,
1137 thought_signature: None,
1138 }
1139 }
1140
1141 /// Creates a new function response content part.
1142 pub fn function_response(name: impl Into<String>, response: serde_json::Value) -> Self {
1143 Self {
1144 text: None,
1145 function_call: None,
1146 function_response: Some(FunctionResponse {
1147 name: name.into(),
1148 response,
1149 }),
1150 thought: None,
1151 thought_signature: None,
1152 }
1153 }
1154
1155 /// Adds a thought signature to this content part.
1156 pub fn with_thought_signature(mut self, signature: impl Into<String>) -> Self {
1157 self.thought_signature = Some(signature.into());
1158 self
1159 }
1160
1161 /// Marks this content part as containing thought content.
1162 pub fn as_thought(mut self) -> Self {
1163 self.thought = Some(true);
1164 self
1165 }
1166}
1167
1168impl EnhancedChatMessage {
1169 /// Creates a new enhanced message with the `System` role.
1170 pub fn system(content: impl Into<String>) -> Self {
1171 Self {
1172 role: MessageRole::System,
1173 parts: vec![ContentPart::text(content)],
1174 }
1175 }
1176
1177 /// Creates a new enhanced message with the `User` role.
1178 pub fn user(content: impl Into<String>) -> Self {
1179 Self {
1180 role: MessageRole::User,
1181 parts: vec![ContentPart::text(content)],
1182 }
1183 }
1184
1185 /// Creates a new enhanced message with the `Assistant` role.
1186 pub fn assistant(content: impl Into<String>) -> Self {
1187 Self {
1188 role: MessageRole::Assistant,
1189 parts: vec![ContentPart::text(content)],
1190 }
1191 }
1192
1193 /// Creates a new enhanced message with multiple content parts.
1194 pub fn with_parts(role: MessageRole, parts: Vec<ContentPart>) -> Self {
1195 Self { role, parts }
1196 }
1197}
1198
1199/// Represents a streaming chat completion response (OpenAI delta format).
1200#[derive(Debug, Clone, Serialize, Deserialize)]
1201pub struct ChatCompletionStreamResponse {
1202 /// A unique identifier for the chat completion.
1203 pub id: String,
1204 /// The type of object, which is always "chat.completion.chunk".
1205 pub object: String,
1206 /// The Unix timestamp (in seconds) of when the completion was created.
1207 pub created: u64,
1208 /// The model that was used for the completion.
1209 pub model: String,
1210 /// A list of chat completion choices.
1211 pub choices: Vec<ChatCompletionStreamChoice>,
1212 /// Information about the token usage for this completion (only present in the final chunk).
1213 #[serde(skip_serializing_if = "Option::is_none")]
1214 pub usage: Option<Usage>,
1215}
1216
1217/// Represents a single choice in a streaming chat completion response.
1218#[derive(Debug, Clone, Serialize, Deserialize)]
1219pub struct ChatCompletionStreamChoice {
1220 /// The index of the choice in the list of choices.
1221 pub index: u32,
1222 /// The delta containing the new content for this choice.
1223 pub delta: ChatCompletionStreamDelta,
1224 /// The reason the model stopped generating tokens (only present in the final chunk).
1225 #[serde(skip_serializing_if = "Option::is_none")]
1226 pub finish_reason: Option<String>,
1227}
1228
1229/// Represents the delta (change) in a streaming chat completion response.
1230#[derive(Debug, Clone, Serialize, Deserialize)]
1231pub struct ChatCompletionStreamDelta {
1232 /// The role of the message (only present in the first chunk).
1233 #[serde(skip_serializing_if = "Option::is_none")]
1234 pub role: Option<String>,
1235 /// The new content for this chunk.
1236 #[serde(skip_serializing_if = "Option::is_none")]
1237 pub content: Option<String>,
1238 /// Tool calls for this chunk (if any).
1239 #[serde(skip_serializing_if = "Option::is_none")]
1240 pub tool_calls: Option<Vec<ToolCall>>,
1241}
1242
1243/// Represents a tool call in a streaming response.
1244#[derive(Debug, Clone, Serialize, Deserialize)]
1245pub struct ToolCall {
1246 /// The index of the tool call.
1247 pub index: u32,
1248 /// The ID of the tool call.
1249 #[serde(skip_serializing_if = "Option::is_none")]
1250 pub id: Option<String>,
1251 /// The type of the tool call.
1252 #[serde(skip_serializing_if = "Option::is_none")]
1253 pub r#type: Option<String>,
1254 /// The function being called.
1255 #[serde(skip_serializing_if = "Option::is_none")]
1256 pub function: Option<ToolCallFunction>,
1257}
1258
1259/// Represents a function call in a tool call.
1260#[derive(Debug, Clone, Serialize, Deserialize)]
1261pub struct ToolCallFunction {
1262 /// The name of the function.
1263 #[serde(skip_serializing_if = "Option::is_none")]
1264 pub name: Option<String>,
1265 /// The arguments for the function.
1266 #[serde(skip_serializing_if = "Option::is_none")]
1267 pub arguments: Option<String>,
1268}