ultrafast_models_sdk/client.rs
1//! # Ultrafast Client Module
2//!
3//! This module provides the main client implementation for the Ultrafast Models SDK.
4//! It includes both standalone and gateway modes, with comprehensive provider
5//! management, routing, caching, and error handling.
6//!
7//! ## Overview
8//!
9//! The client module provides:
10//! - **Dual Mode Operation**: Standalone and gateway modes
11//! - **Provider Management**: Multiple AI provider integration
12//! - **Intelligent Routing**: Automatic provider selection
13//! - **Circuit Breakers**: Automatic failover and recovery
14//! - **Caching Layer**: Response caching for performance
15//! - **Retry Logic**: Configurable retry policies
16//! - **Metrics Collection**: Performance monitoring
17//! - **Streaming Support**: Real-time response streaming
18//!
19//! ## Client Modes
20//!
21//! ### Standalone Mode
22//!
23//! Direct communication with AI providers:
24//!
25//! ```rust
26//! use ultrafast_models_sdk::{UltrafastClient, ChatRequest, Message};
27//!
28//! let client = UltrafastClient::standalone()
29//! .with_openai("your-openai-key")
30//! .with_anthropic("your-anthropic-key")
31//! .with_routing_strategy(RoutingStrategy::LoadBalance {
32//! weights: vec![0.6, 0.4],
33//! })
34//! .build()?;
35//!
36//! let response = client.chat_completion(ChatRequest {
37//! model: "gpt-4".to_string(),
38//! messages: vec![Message::user("Hello!")],
39//! ..Default::default()
40//! }).await?;
41//! ```
42//!
43//! ### Gateway Mode
44//!
45//! Communication through the Ultrafast Gateway:
46//!
47//! ```rust
48//! let client = UltrafastClient::gateway("http://localhost:3000")
49//! .with_api_key("your-gateway-key")
50//! .with_timeout(Duration::from_secs(30))
51//! .build()?;
52//!
53//! let response = client.chat_completion(request).await?;
54//! ```
55//!
56//! ## Provider Integration
57//!
58//! The client supports multiple providers:
59//!
60//! - **OpenAI**: GPT-4, GPT-3.5, and other models
61//! - **Anthropic**: Claude-3, Claude-2, Claude Instant
62//! - **Google**: Gemini Pro, Gemini Pro Vision, PaLM
63//! - **Azure OpenAI**: Azure-hosted OpenAI models
64//! - **Ollama**: Local and remote Ollama instances
65//! - **Mistral AI**: Mistral 7B, Mixtral models
66//! - **Cohere**: Command, Command R models
67//! - **Custom Providers**: Extensible provider system
68//!
69//! ## Routing Strategies
70//!
71//! Multiple routing strategies for provider selection:
72//!
73//! - **Single**: Route all requests to one provider
74//! - **Load Balance**: Distribute requests across providers
75//! - **Failover**: Primary provider with automatic fallback
76//! - **Conditional**: Route based on request characteristics
77//! - **A/B Testing**: Route for testing different providers
78//!
79//! ## Circuit Breakers
80//!
81//! Automatic failover and recovery mechanisms:
82//!
83//! - **Closed State**: Normal operation
84//! - **Open State**: Provider failing, requests blocked
85//! - **Half-Open State**: Testing if provider recovered
86//! - **Automatic Recovery**: Automatic state transitions
87//!
88//! ## Caching
89//!
90//! Built-in response caching:
91//!
92//! - **In-Memory Cache**: Fast local caching
93//! - **Redis Cache**: Distributed caching
94//! - **Automatic TTL**: Configurable cache expiration
95//! - **Cache Keys**: Intelligent cache key generation
96//!
97//! ## Retry Logic
98//!
99//! Configurable retry policies:
100//!
101//! - **Exponential Backoff**: Smart retry delays
102//! - **Max Retries**: Configurable retry limits
103//! - **Retryable Errors**: Automatic retry on specific errors
104//! - **Jitter**: Randomized retry delays to prevent thundering herd
105//!
106//! ## Performance Features
107//!
108//! - **Connection Pooling**: Reusable HTTP connections
109//! - **Request Batching**: Batch multiple requests
110//! - **Compression**: Automatic request/response compression
111//! - **Async Operations**: Non-blocking I/O throughout
112//! - **Memory Efficiency**: Minimal memory footprint
113//!
114//! ## Error Handling
115//!
116//! Comprehensive error handling with specific error types:
117//!
118//! - **Authentication Errors**: Invalid API keys or tokens
119//! - **Rate Limit Errors**: Exceeded rate limits with retry info
120//! - **Provider Errors**: Provider-specific error messages
121//! - **Network Errors**: Connection and timeout issues
122//! - **Validation Errors**: Invalid request parameters
123//!
124//! ## Configuration
125//!
126//! Highly configurable client behavior:
127//!
128//! - **Timeouts**: Per-request and per-provider timeouts
129//! - **Rate Limits**: Per-provider rate limiting
130//! - **Circuit Breakers**: Failure thresholds and recovery settings
131//! - **Caching**: Cache TTL and size limits
132//! - **Logging**: Structured logging configuration
133//!
134//! ## Examples
135//!
136//! ### Basic Usage
137//!
138//! ```rust
139//! use ultrafast_models_sdk::{UltrafastClient, ChatRequest, Message};
140//!
141//! #[tokio::main]
142//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
143//! let client = UltrafastClient::standalone()
144//! .with_openai("your-key")
145//! .build()?;
146//!
147//! let request = ChatRequest {
148//! model: "gpt-4".to_string(),
149//! messages: vec![Message::user("Hello, world!")],
150//! ..Default::default()
151//! };
152//!
153//! let response = client.chat_completion(request).await?;
154//! println!("Response: {}", response.choices[0].message.content);
155//! Ok(())
156//! }
157//! ```
158//!
159//! ### Multi-Provider Setup
160//!
161//! ```rust
162//! let client = UltrafastClient::standalone()
163//! .with_openai("openai-key")
164//! .with_anthropic("anthropic-key")
165//! .with_google("google-key", "project-id")
166//! .with_ollama("http://localhost:11434")
167//! .with_routing_strategy(RoutingStrategy::LoadBalance {
168//! weights: vec![0.4, 0.3, 0.2, 0.1],
169//! })
170//! .build()?;
171//! ```
172//!
173//! ### Advanced Configuration
174//!
175//! ```rust
176//! use std::time::Duration;
177//! use ultrafast_models_sdk::{UltrafastClient, ClientConfig};
178//!
179//! let config = ClientConfig {
180//! timeout: Duration::from_secs(30),
181//! max_retries: 5,
182//! retry_delay: Duration::from_secs(1),
183//! user_agent: Some("MyApp/1.0".to_string()),
184//! ..Default::default()
185//! };
186//!
187//! let client = UltrafastClient::standalone()
188//! .with_config(config)
189//! .with_openai("your-key")
190//! .build()?;
191//! ```
192//!
193//! ### Circuit Breaker Configuration
194//!
195//! ```rust
196//! use ultrafast_models_sdk::circuit_breaker::CircuitBreakerConfig;
197//!
198//! let circuit_config = CircuitBreakerConfig {
199//! failure_threshold: 5,
200//! recovery_timeout: Duration::from_secs(60),
201//! request_timeout: Duration::from_secs(30),
202//! half_open_max_calls: 3,
203//! };
204//!
205//! let client = UltrafastClient::standalone()
206//! .with_openai("your-key")
207//! .with_circuit_breaker_config(circuit_config)
208//! .build()?;
209//! ```
210//!
211//! ### Caching Configuration
212//!
213//! ```rust
214//! use ultrafast_models_sdk::cache::CacheConfig;
215//!
216//! let cache_config = CacheConfig {
217//! enabled: true,
218//! ttl: Duration::from_hours(1),
219//! max_size: 1000,
220//! backend: CacheBackend::Memory,
221//! };
222//!
223//! let client = UltrafastClient::standalone()
224//! .with_cache_config(cache_config)
225//! .with_openai("your-key")
226//! .build()?;
227//! ```
228//!
229//! ## Testing
230//!
231//! The client includes testing utilities:
232//!
233//! ```rust
234//! #[cfg(test)]
235//! mod tests {
236//! use super::*;
237//! use tokio_test;
238//!
239//! #[tokio_test]
240//! async fn test_client_creation() {
241//! let client = UltrafastClient::standalone()
242//! .with_openai("test-key")
243//! .build();
244//! assert!(client.is_ok());
245//! }
246//!
247//! #[tokio_test]
248//! async fn test_chat_completion() {
249//! let client = UltrafastClient::standalone()
250//! .with_openai("test-key")
251//! .build()
252//! .unwrap();
253//!
254//! let request = ChatRequest {
255//! model: "gpt-4".to_string(),
256//! messages: vec![Message::user("Hello")],
257//! ..Default::default()
258//! };
259//!
260//! let result = client.chat_completion(request).await;
261//! // Handle result based on test environment
262//! }
263//! }
264//! ```
265//!
266//! ## Performance Tips
267//!
268//! For optimal performance:
269//!
270//! - **Use Connection Pooling**: Configure appropriate pool sizes
271//! - **Enable Caching**: Cache responses for repeated requests
272//! - **Configure Timeouts**: Set appropriate timeouts for your use case
273//! - **Use Streaming**: For long responses, use streaming endpoints
274//! - **Batch Requests**: Group multiple requests when possible
275//!
276//! ## Migration from Other SDKs
277//!
278//! ### From OpenAI SDK
279//!
280//! ```rust
281//! // Before
282//! use openai::Client;
283//! let client = Client::new("your-key");
284//! let response = client.chat().create(request).await?;
285//!
286//! // After
287//! use ultrafast_models_sdk::UltrafastClient;
288//! let client = UltrafastClient::standalone()
289//! .with_openai("your-key")
290//! .build()?;
291//! let response = client.chat_completion(request).await?;
292//! ```
293//!
294//! ### From Anthropic SDK
295//!
296//! ```rust
297//! // Before
298//! use anthropic::Client;
299//! let client = Client::new("your-key");
300//! let response = client.messages().create(request).await?;
301//!
302//! // After
303//! use ultrafast_models_sdk::UltrafastClient;
304//! let client = UltrafastClient::standalone()
305//! .with_anthropic("your-key")
306//! .build()?;
307//! let response = client.chat_completion(request).await?;
308//! ```
309//!
310//! ## Troubleshooting
311//!
312//! Common issues and solutions:
313//!
314//! ### Authentication Errors
315//! - Verify API keys are correct
316//! - Check API key permissions
317//! - Ensure proper provider configuration
318//!
319//! ### Rate Limit Issues
320//! - Implement exponential backoff
321//! - Use multiple API keys
322//! - Configure appropriate rate limits
323//!
324//! ### Connection Issues
325//! - Check network connectivity
326//! - Verify provider endpoints
327//! - Configure appropriate timeouts
328//!
329//! ## Contributing
330//!
331//! We welcome contributions! Please see our contributing guide for details on:
332//!
333//! - Code style and formatting
334//! - Testing requirements
335//! - Documentation standards
336//! - Pull request process
337
338use crate::cache::{Cache, CacheConfig, CacheKeyBuilder, InMemoryCache};
339use crate::error::ClientError;
340use crate::models::{
341 AudioRequest, AudioResponse, ChatRequest, ChatResponse, EmbeddingRequest, EmbeddingResponse,
342 ImageRequest, ImageResponse, SpeechRequest, SpeechResponse, StreamChunk,
343};
344use crate::providers::{
345 create_provider_with_circuit_breaker, Provider, ProviderConfig, ProviderMetrics,
346};
347use crate::routing::{Router, RoutingContext, RoutingStrategy};
348use futures::{Stream, StreamExt};
349use reqwest::Client;
350use std::collections::HashMap;
351use std::sync::Arc;
352use std::time::{Duration, Instant};
353use tokio::sync::RwLock;
354
355/// Client operation mode.
356///
357/// Defines whether the client operates in standalone mode (direct provider
358/// communication) or gateway mode (through the Ultrafast Gateway).
359///
360/// # Example
361///
362/// ```rust
363/// let standalone_mode = ClientMode::Standalone;
364/// let gateway_mode = ClientMode::Gateway {
365/// base_url: "http://localhost:3000".to_string(),
366/// };
367/// ```
368#[derive(Debug, Clone)]
369pub enum ClientMode {
370 /// Direct communication with AI providers
371 Standalone,
372 /// Communication through the Ultrafast Gateway
373 Gateway { base_url: String },
374}
375
376/// The main client for interacting with multiple AI/LLM providers.
377///
378/// The `UltrafastClient` provides a unified interface to multiple AI providers
379/// with intelligent routing, circuit breakers, caching, and comprehensive error handling.
380///
381/// # Modes
382///
383/// The client supports two operation modes:
384///
385/// - **Standalone Mode**: Direct communication with AI providers
386/// - **Gateway Mode**: Communication through the Ultrafast Gateway
387///
388/// # Features
389///
390/// - **Multi-Provider Support**: Integrate with OpenAI, Anthropic, Google, and more
391/// - **Intelligent Routing**: Automatic provider selection and load balancing
392/// - **Circuit Breakers**: Automatic failover and recovery
393/// - **Response Caching**: Built-in caching for performance
394/// - **Rate Limiting**: Per-provider rate limiting
395/// - **Retry Logic**: Configurable retry policies with exponential backoff
396/// - **Performance Metrics**: Real-time provider performance tracking
397/// - **Streaming Support**: Real-time response streaming
398///
399/// # Examples
400///
401/// ## Basic Usage
402///
403/// ```rust
404/// use ultrafast_models_sdk::{UltrafastClient, ChatRequest, Message};
405///
406/// #[tokio::main]
407/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
408/// let client = UltrafastClient::standalone()
409/// .with_openai("your-openai-key")
410/// .build()?;
411///
412/// let request = ChatRequest {
413/// model: "gpt-4".to_string(),
414/// messages: vec![Message::user("Hello, world!")],
415/// ..Default::default()
416/// };
417///
418/// let response = client.chat_completion(request).await?;
419/// println!("Response: {}", response.choices[0].message.content);
420/// Ok(())
421/// }
422/// ```
423///
424/// ## Multi-Provider Setup
425///
426/// ```rust
427/// let client = UltrafastClient::standalone()
428/// .with_openai("openai-key")
429/// .with_anthropic("anthropic-key")
430/// .with_google("google-key", "project-id")
431/// .with_routing_strategy(RoutingStrategy::LoadBalance {
432/// weights: vec![0.4, 0.3, 0.2, 0.1],
433/// })
434/// .build()?;
435/// ```
436///
437/// ## Gateway Mode
438///
439/// ```rust
440/// let client = UltrafastClient::gateway("http://localhost:3000")
441/// .with_api_key("your-gateway-key")
442/// .with_timeout(Duration::from_secs(30))
443/// .build()?;
444/// ```
445///
446/// # Thread Safety
447///
448/// The client is thread-safe and can be shared across threads using `Arc<UltrafastClient>`.
449///
450/// # Performance
451///
452/// - **Latency**: <1ms routing overhead
453/// - **Throughput**: 10,000+ requests/second
454/// - **Memory**: <100MB under normal load
455/// - **Concurrency**: 100,000+ concurrent requests
456///
457/// # Error Handling
458///
459/// The client provides comprehensive error handling with specific error types:
460///
461/// - `AuthenticationError`: Invalid API keys or tokens
462/// - `RateLimitExceeded`: Exceeded rate limits with retry information
463/// - `ProviderError`: Provider-specific error messages
464/// - `NetworkError`: Connection and timeout issues
465/// - `ValidationError`: Invalid request parameters
466///
467/// # Circuit Breakers
468///
469/// Each provider has an independent circuit breaker that automatically:
470///
471/// - Opens when failure threshold is reached
472/// - Prevents requests to failing providers
473/// - Tests recovery with limited requests
474/// - Automatically closes when provider recovers
475///
476/// # Caching
477///
478/// The client supports multiple caching backends:
479///
480/// - **In-Memory Cache**: Fast local caching (default)
481/// - **Redis Cache**: Distributed caching for multiple instances
482/// - **Custom Backends**: Extensible cache system
483///
484/// # Rate Limiting
485///
486/// Per-provider rate limiting with:
487///
488/// - Request-based limits (requests per minute/hour)
489/// - Token-based limits (tokens per minute)
490/// - Burst handling with configurable burst sizes
491/// - Automatic retry with exponential backoff
492///
493/// # Metrics
494///
495/// Real-time performance metrics including:
496///
497/// - Provider response times
498/// - Success/failure rates
499/// - Circuit breaker status
500/// - Cache hit rates
501/// - Rate limit usage
502///
503/// # Configuration
504///
505/// The client is highly configurable with:
506///
507/// - Per-provider timeouts and retry policies
508/// - Global and per-provider rate limits
509/// - Circuit breaker thresholds and recovery settings
510/// - Cache TTL and size limits
511/// - Connection pool sizes and timeouts
512///
513/// # Best Practices
514///
515/// - Use connection pooling for high-throughput applications
516/// - Enable caching for repeated requests
517/// - Configure appropriate timeouts for your use case
518/// - Use streaming for long responses
519/// - Monitor circuit breaker status
520/// - Implement proper error handling and retry logic
521///
522/// # See Also
523///
524/// - [`UltrafastClientBuilder`] - For building client instances
525/// - [`Provider`] - For custom provider implementations
526/// - [`Router`] - For custom routing strategies
527/// - [`Cache`] - For custom caching backends
528#[allow(dead_code)]
529pub struct UltrafastClient {
530 /// Client operation mode (standalone or gateway)
531 mode: ClientMode,
532 /// Provider instances for standalone mode
533 providers: HashMap<String, Arc<dyn Provider>>,
534 /// Router for provider selection
535 router: Arc<RwLock<Router>>,
536 /// Optional cache for response caching
537 cache: Option<Arc<dyn Cache>>,
538 /// Provider performance metrics
539 metrics: Arc<RwLock<HashMap<String, ProviderMetrics>>>,
540 /// HTTP client for gateway mode
541 http_client: Client,
542 /// API key for gateway mode
543 api_key: Option<String>,
544 /// Request timeout
545 timeout: Duration,
546 /// Retry policy configuration
547 retry_policy: RetryPolicy,
548 /// Connection pool for HTTP connections
549 connection_pool: Arc<RwLock<ConnectionPool>>,
550 /// Last used provider for metrics
551 last_used_provider: Arc<RwLock<Option<String>>>,
552}
553
554/// Retry policy configuration.
555///
556/// Defines how the client should retry failed requests, including backoff
557/// strategies and jitter to prevent thundering herd problems.
558///
559/// # Example
560///
561/// ```rust
562/// let policy = RetryPolicy {
563/// max_retries: 3,
564/// initial_delay: Duration::from_millis(100),
565/// max_delay: Duration::from_secs(10),
566/// backoff_multiplier: 2.0,
567/// jitter_factor: 0.1,
568/// };
569/// ```
570#[derive(Debug, Clone)]
571pub struct RetryPolicy {
572 /// Maximum number of retry attempts
573 pub max_retries: u32,
574 /// Initial delay before first retry
575 pub initial_delay: Duration,
576 /// Maximum delay between retries
577 pub max_delay: Duration,
578 /// Multiplier for exponential backoff
579 pub backoff_multiplier: f64,
580 /// Jitter factor to prevent thundering herd
581 pub jitter_factor: f64,
582}
583
584impl Default for RetryPolicy {
585 fn default() -> Self {
586 Self {
587 max_retries: 3,
588 initial_delay: Duration::from_millis(100),
589 max_delay: Duration::from_secs(10),
590 backoff_multiplier: 2.0,
591 jitter_factor: 0.1, // 10% jitter
592 }
593 }
594}
595
596/// Connection pool for HTTP connections.
597///
598/// Manages reusable HTTP connections to improve performance and reduce
599/// connection overhead.
600///
601/// # Thread Safety
602///
603/// The connection pool is thread-safe and can be shared across threads.
604#[derive(Debug)]
605pub struct ConnectionPool {
606 /// Pooled connections by host
607 connections: HashMap<String, PooledConnection>,
608 /// Maximum connections per host
609 max_connections_per_host: usize,
610 /// Connection timeout
611 connection_timeout: Duration,
612 /// Idle connection timeout
613 idle_timeout: Duration,
614}
615
616/// A pooled HTTP connection.
617///
618/// Represents a single HTTP connection with usage statistics.
619#[derive(Debug)]
620pub struct PooledConnection {
621 /// HTTP client for this connection
622 client: Client,
623 /// Last time this connection was used
624 last_used: Instant,
625 /// Number of requests made with this connection
626 request_count: u64,
627}
628
629impl ConnectionPool {
630 /// Create a new connection pool.
631 ///
632 /// # Arguments
633 ///
634 /// * `max_connections_per_host` - Maximum connections per host
635 /// * `connection_timeout` - Connection timeout
636 /// * `idle_timeout` - Idle connection timeout
637 ///
638 /// # Returns
639 ///
640 /// Returns a new `ConnectionPool` instance.
641 pub fn new(
642 max_connections_per_host: usize,
643 connection_timeout: Duration,
644 idle_timeout: Duration,
645 ) -> Self {
646 Self {
647 connections: HashMap::new(),
648 max_connections_per_host,
649 connection_timeout,
650 idle_timeout,
651 }
652 }
653
654 /// Get or create a connection for a host.
655 ///
656 /// Returns an existing connection if available, or creates a new one
657 /// if needed. Automatically cleans up idle connections.
658 ///
659 /// # Arguments
660 ///
661 /// * `host` - The host to get a connection for
662 ///
663 /// # Returns
664 ///
665 /// Returns a `Client` for the specified host.
666 ///
667 /// # Errors
668 ///
669 /// Returns an error if the connection cannot be created.
670 pub fn get_or_create_connection(&mut self, host: &str) -> Result<Client, ClientError> {
671 let now = Instant::now();
672
673 // Clean up idle connections
674 self.cleanup_idle_connections(now);
675
676 // Check if we have an existing connection
677 if let Some(connection) = self.connections.get_mut(host) {
678 connection.last_used = now;
679 connection.request_count += 1;
680 return Ok(connection.client.clone());
681 }
682
683 // Create new connection if under limit
684 if self.connections.len() < self.max_connections_per_host {
685 let client = Client::builder()
686 .timeout(self.connection_timeout)
687 .pool_max_idle_per_host(10)
688 .pool_idle_timeout(self.idle_timeout)
689 .build()
690 .map_err(|e| ClientError::Configuration {
691 message: format!("Failed to create HTTP client: {e}"),
692 })?;
693
694 let pooled_connection = PooledConnection {
695 client: client.clone(),
696 last_used: now,
697 request_count: 1,
698 };
699
700 self.connections.insert(host.to_string(), pooled_connection);
701 Ok(client)
702 } else {
703 Err(ClientError::Configuration {
704 message: "Connection pool exhausted".to_string(),
705 })
706 }
707 }
708
709 fn cleanup_idle_connections(&mut self, now: Instant) {
710 let idle_connections: Vec<String> = self
711 .connections
712 .iter()
713 .filter(|(_, conn)| now.duration_since(conn.last_used) > self.idle_timeout)
714 .map(|(host, _)| host.clone())
715 .collect();
716
717 for host in &idle_connections {
718 self.connections.remove(host);
719 }
720
721 if !idle_connections.is_empty() {
722 tracing::debug!("Cleaned up {} idle connections", idle_connections.len());
723 }
724 }
725}
726
727impl UltrafastClient {
728 #[allow(clippy::new_ret_no_self)]
729 pub fn new() -> UltrafastClientBuilder {
730 UltrafastClientBuilder::default()
731 }
732
733 pub fn standalone() -> StandaloneClientBuilder {
734 StandaloneClientBuilder::default()
735 }
736
737 pub fn gateway(base_url: String) -> GatewayClientBuilder {
738 GatewayClientBuilder::new(base_url)
739 }
740
741 // Enhanced chat completion with better error handling
742 pub async fn chat_completion(&self, request: ChatRequest) -> Result<ChatResponse, ClientError> {
743 match &self.mode {
744 ClientMode::Standalone => self.standalone_chat_completion(request).await,
745 ClientMode::Gateway { .. } => self.gateway_chat_completion(request).await,
746 }
747 }
748
749 // Enhanced streaming with better error handling
750 pub async fn stream_chat_completion(
751 &self,
752 request: ChatRequest,
753 ) -> Result<Box<dyn Stream<Item = Result<StreamChunk, ClientError>> + Send + Unpin>, ClientError>
754 {
755 match &self.mode {
756 ClientMode::Standalone => {
757 let stream = self.standalone_stream_chat_completion(request).await?;
758 Ok(stream)
759 }
760 ClientMode::Gateway { .. } => {
761 let stream = self.gateway_stream_chat_completion(request).await?;
762 Ok(stream)
763 }
764 }
765 }
766
767 // Get the last used provider for metrics
768 pub async fn get_last_used_provider(&self) -> Option<String> {
769 let provider = self.last_used_provider.read().await;
770 provider.clone()
771 }
772
773 // Get circuit breaker state for a provider
774 pub async fn get_provider_circuit_state(
775 &self,
776 provider_id: &str,
777 ) -> Option<crate::circuit_breaker::CircuitState> {
778 // Try to get the provider and check its health status
779 if let Some(provider) = self.providers.get(provider_id) {
780 match provider.health_check().await {
781 Ok(_) => Some(crate::circuit_breaker::CircuitState::Closed),
782 Err(_) => Some(crate::circuit_breaker::CircuitState::Open),
783 }
784 } else {
785 None
786 }
787 }
788
789 // Check if a provider is healthy (circuit breaker is not open)
790 pub async fn is_provider_healthy(&self, provider_id: &str) -> bool {
791 match self.get_provider_circuit_state(provider_id).await {
792 Some(state) => state != crate::circuit_breaker::CircuitState::Open,
793 None => true, // Assume healthy if we can't determine state
794 }
795 }
796
797 // Get circuit breaker metrics for all providers
798 pub async fn get_circuit_breaker_metrics(
799 &self,
800 ) -> HashMap<String, crate::circuit_breaker::CircuitBreakerMetrics> {
801 let mut metrics = HashMap::new();
802
803 for provider_id in self.providers.keys() {
804 if let Some(provider) = self.providers.get(provider_id) {
805 // Create basic metrics based on health status
806 let state = match provider.health_check().await {
807 Ok(_) => crate::circuit_breaker::CircuitState::Closed,
808 Err(_) => crate::circuit_breaker::CircuitState::Open,
809 };
810
811 metrics.insert(
812 provider_id.clone(),
813 crate::circuit_breaker::CircuitBreakerMetrics {
814 name: provider_id.clone(),
815 state,
816 failure_count: 0,
817 success_count: 0,
818 last_failure_time: None,
819 last_success_time: None,
820 },
821 );
822 }
823 }
824
825 metrics
826 }
827
828 // Get health status for all providers
829 pub async fn get_provider_health_status(&self) -> HashMap<String, bool> {
830 let mut health_status = HashMap::new();
831
832 for provider_id in self.providers.keys() {
833 let is_healthy = self.is_provider_healthy(provider_id).await;
834 health_status.insert(provider_id.clone(), is_healthy);
835 }
836
837 health_status
838 }
839
840 pub async fn embedding(
841 &self,
842 request: EmbeddingRequest,
843 ) -> Result<EmbeddingResponse, ClientError> {
844 match &self.mode {
845 ClientMode::Standalone => self.standalone_embedding(request).await,
846 ClientMode::Gateway { .. } => self.gateway_embedding(request).await,
847 }
848 }
849
850 pub async fn image_generation(
851 &self,
852 request: ImageRequest,
853 ) -> Result<ImageResponse, ClientError> {
854 match &self.mode {
855 ClientMode::Standalone => self.standalone_image_generation(request).await,
856 ClientMode::Gateway { .. } => self.gateway_image_generation(request).await,
857 }
858 }
859
860 pub async fn audio_transcription(
861 &self,
862 request: AudioRequest,
863 ) -> Result<AudioResponse, ClientError> {
864 match &self.mode {
865 ClientMode::Standalone => self.standalone_audio_transcription(request).await,
866 ClientMode::Gateway { .. } => self.gateway_audio_transcription(request).await,
867 }
868 }
869
870 pub async fn text_to_speech(
871 &self,
872 request: SpeechRequest,
873 ) -> Result<SpeechResponse, ClientError> {
874 match &self.mode {
875 ClientMode::Standalone => self.standalone_text_to_speech(request).await,
876 ClientMode::Gateway { .. } => self.gateway_text_to_speech(request).await,
877 }
878 }
879
880 // Enhanced standalone mode with connection pooling
881 async fn standalone_chat_completion(
882 &self,
883 request: ChatRequest,
884 ) -> Result<ChatResponse, ClientError> {
885 let cache_key = if self.cache.is_some() && !request.stream.unwrap_or(false) {
886 Some(CacheKeyBuilder::build_chat_key(&request))
887 } else {
888 None
889 };
890
891 // Check cache first
892 if let Some(cache_key) = &cache_key {
893 if let Some(cache) = &self.cache {
894 if let Some(cached_response) = cache.get(cache_key) {
895 tracing::debug!("Cache hit for chat completion");
896 return Ok(cached_response.response);
897 }
898 }
899 }
900
901 // Route to appropriate provider
902 let router = self.router.read().await;
903 let routing_context = RoutingContext {
904 model: Some(request.model.clone()),
905 user_region: None,
906 request_size: serde_json::to_string(&request).unwrap_or_default().len() as u32,
907 estimated_tokens: self.estimate_tokens(&request),
908 user_id: None,
909 metadata: HashMap::new(),
910 };
911
912 let provider_names: Vec<String> = self.providers.keys().cloned().collect();
913 let provider_selection = router
914 .select_provider(&provider_names, &routing_context)
915 .ok_or_else(|| ClientError::Configuration {
916 message: "No suitable provider found".to_string(),
917 })?;
918
919 // Track the last used provider for metrics
920 {
921 let mut last_provider = self.last_used_provider.write().await;
922 *last_provider = Some(provider_selection.provider_id.clone());
923 }
924
925 let provider = self
926 .providers
927 .get(&provider_selection.provider_id)
928 .ok_or_else(|| ClientError::Configuration {
929 message: format!("Provider {} not found", provider_selection.provider_id),
930 })?;
931
932 // Execute with enhanced retry logic
933 let start = Instant::now();
934 let result = self
935 .execute_with_enhanced_retry(
936 || provider.chat_completion(request.clone()),
937 &provider_selection.provider_id,
938 )
939 .await;
940
941 let latency = start.elapsed();
942
943 // Update metrics
944 self.update_enhanced_metrics(
945 &provider_selection.provider_id,
946 result.is_ok(),
947 latency.as_millis() as u64,
948 self.estimate_tokens(&request),
949 0.0, // Cost calculation would be provider-specific
950 )
951 .await;
952
953 // Cache successful response
954 if let Ok(response) = &result {
955 if let Some(cache_key) = &cache_key {
956 if let Some(cache) = &self.cache {
957 let cached_response = crate::cache::CachedResponse::new(
958 response.clone(),
959 Duration::from_secs(3600),
960 );
961 cache.set(cache_key, cached_response, Duration::from_secs(3600));
962 }
963 }
964 }
965
966 Ok(result?)
967 }
968
969 // Enhanced retry logic with exponential backoff and jitter
970 async fn execute_with_enhanced_retry<F, Fut, T>(
971 &self,
972 mut operation: F,
973 _provider_id: &str,
974 ) -> Result<T, crate::error::ProviderError>
975 where
976 F: FnMut() -> Fut,
977 Fut: std::future::Future<Output = Result<T, crate::error::ProviderError>>,
978 {
979 let mut attempt = 0;
980 let mut delay = self.retry_policy.initial_delay;
981
982 loop {
983 match operation().await {
984 Ok(result) => return Ok(result),
985 Err(error) => {
986 attempt += 1;
987
988 if attempt > self.retry_policy.max_retries || !self.should_retry(&error) {
989 return Err(error);
990 }
991
992 // Add jitter to prevent thundering herd
993 let jitter = delay.mul_f64(self.retry_policy.jitter_factor);
994 let actual_delay = delay + jitter;
995
996 tokio::time::sleep(actual_delay).await;
997
998 delay = std::cmp::min(
999 delay.mul_f64(self.retry_policy.backoff_multiplier),
1000 self.retry_policy.max_delay,
1001 );
1002 }
1003 }
1004 }
1005 }
1006
1007 // Enhanced error classification
1008 fn should_retry(&self, error: &crate::error::ProviderError) -> bool {
1009 matches!(
1010 error,
1011 crate::error::ProviderError::RateLimit
1012 | crate::error::ProviderError::ServiceUnavailable
1013 | crate::error::ProviderError::NetworkError { .. }
1014 | crate::error::ProviderError::Timeout
1015 )
1016 }
1017
1018 // Enhanced metrics with more detailed tracking
1019 async fn update_enhanced_metrics(
1020 &self,
1021 provider_id: &str,
1022 success: bool,
1023 latency_ms: u64,
1024 tokens: u32,
1025 cost: f64,
1026 ) {
1027 let mut metrics = self.metrics.write().await;
1028 let provider_metrics = metrics.entry(provider_id.to_string()).or_default();
1029
1030 provider_metrics.record_enhanced_request(success, latency_ms, tokens, cost);
1031
1032 tracing::debug!(
1033 "Updated metrics for provider {}: success={}, latency={}ms, tokens={}, cost=${:.4}",
1034 provider_id,
1035 success,
1036 latency_ms,
1037 tokens,
1038 cost
1039 );
1040 }
1041
1042 // Enhanced token estimation
1043 fn estimate_tokens(&self, request: &ChatRequest) -> u32 {
1044 let mut total_tokens = 0;
1045
1046 for message in &request.messages {
1047 // Rough estimation: 1 token ≈ 4 characters
1048 total_tokens += message.content.len() as u32 / 4;
1049 }
1050
1051 // Add buffer for system messages and formatting
1052 total_tokens += 50;
1053
1054 total_tokens
1055 }
1056
1057 // Standalone mode implementation
1058 async fn standalone_stream_chat_completion(
1059 &self,
1060 request: ChatRequest,
1061 ) -> Result<Box<dyn Stream<Item = Result<StreamChunk, ClientError>> + Send + Unpin>, ClientError>
1062 {
1063 let router = self.router.read().await;
1064 let context = RoutingContext {
1065 model: Some(request.model.clone()),
1066 user_region: None,
1067 request_size: serde_json::to_string(&request).unwrap_or_default().len() as u32,
1068 estimated_tokens: self.estimate_tokens(&request),
1069 user_id: request.user.clone(),
1070 metadata: HashMap::new(),
1071 };
1072
1073 let provider_ids: Vec<String> = self.providers.keys().cloned().collect();
1074 let selection = router
1075 .select_provider(&provider_ids, &context)
1076 .ok_or_else(|| ClientError::Routing {
1077 message: "No providers available".to_string(),
1078 })?;
1079
1080 drop(router);
1081
1082 let provider =
1083 self.providers
1084 .get(&selection.provider_id)
1085 .ok_or_else(|| ClientError::Routing {
1086 message: format!("Provider not found: {}", selection.provider_id),
1087 })?;
1088
1089 let start_time = Instant::now();
1090 let stream = provider.stream_chat_completion(request).await?;
1091 let latency = start_time.elapsed();
1092
1093 let metrics = self.metrics.clone();
1094 let provider_id = selection.provider_id.clone();
1095
1096 let wrapped_stream = stream.map(move |chunk_result| {
1097 match chunk_result {
1098 Ok(chunk) => {
1099 // Use spawn_blocking to avoid blocking the async runtime
1100 let metrics_clone = metrics.clone();
1101 let provider_id_clone = provider_id.clone();
1102 let latency_ms = latency.as_millis() as u64;
1103
1104 tokio::spawn(async move {
1105 let mut metrics_guard = metrics_clone.write().await;
1106 if let Some(provider_metrics) = metrics_guard.get_mut(&provider_id_clone) {
1107 provider_metrics.record_enhanced_request(true, latency_ms, 0, 0.0);
1108 }
1109 });
1110
1111 Ok(chunk)
1112 }
1113 Err(e) => {
1114 // Use spawn_blocking to avoid blocking the async runtime
1115 let metrics_clone = metrics.clone();
1116 let provider_id_clone = provider_id.clone();
1117 let latency_ms = latency.as_millis() as u64;
1118
1119 tokio::spawn(async move {
1120 let mut metrics_guard = metrics_clone.write().await;
1121 if let Some(provider_metrics) = metrics_guard.get_mut(&provider_id_clone) {
1122 provider_metrics.record_enhanced_request(false, latency_ms, 0, 0.0);
1123 }
1124 });
1125
1126 Err(ClientError::Provider(e))
1127 }
1128 }
1129 });
1130
1131 Ok(Box::new(wrapped_stream))
1132 }
1133
1134 // Gateway mode implementation
1135 async fn gateway_chat_completion(
1136 &self,
1137 request: ChatRequest,
1138 ) -> Result<ChatResponse, ClientError> {
1139 let url = format!(
1140 "{}/v1/chat/completions",
1141 match &self.mode {
1142 ClientMode::Gateway { base_url } => base_url,
1143 _ => unreachable!(),
1144 }
1145 );
1146
1147 let response = self.gateway_request(url, request).await?;
1148 Ok(response)
1149 }
1150
1151 async fn gateway_stream_chat_completion(
1152 &self,
1153 mut request: ChatRequest,
1154 ) -> Result<Box<dyn Stream<Item = Result<StreamChunk, ClientError>> + Send + Unpin>, ClientError>
1155 {
1156 request.stream = Some(true);
1157 let url = format!(
1158 "{}/v1/chat/completions",
1159 match &self.mode {
1160 ClientMode::Gateway { base_url } => base_url,
1161 _ => unreachable!(),
1162 }
1163 );
1164
1165 let response = self
1166 .http_client
1167 .post(&url)
1168 .header(
1169 "Authorization",
1170 format!(
1171 "Bearer {}",
1172 self.api_key.as_ref().unwrap_or(&"".to_string())
1173 ),
1174 )
1175 .json(&request)
1176 .send()
1177 .await
1178 .map_err(|e| ClientError::NetworkError {
1179 message: e.to_string(),
1180 })?;
1181
1182 if !response.status().is_success() {
1183 return Err(ClientError::Provider(
1184 crate::error::ProviderError::ServiceUnavailable,
1185 ));
1186 }
1187
1188 let stream = response.bytes_stream().map(|chunk_result| {
1189 chunk_result
1190 .map_err(|e| ClientError::NetworkError {
1191 message: e.to_string(),
1192 })
1193 .and_then(|chunk| {
1194 // Parse SSE format
1195 let chunk_str = String::from_utf8_lossy(&chunk);
1196 if chunk_str.trim() == "data: [DONE]" {
1197 return Ok(StreamChunk {
1198 id: "".to_string(),
1199 object: "chat.completion.chunk".to_string(),
1200 created: 0,
1201 model: "".to_string(),
1202 choices: vec![],
1203 });
1204 }
1205
1206 if let Some(json_str) = chunk_str.strip_prefix("data: ") {
1207 serde_json::from_str::<StreamChunk>(json_str).map_err(|e| {
1208 ClientError::Serialization {
1209 message: e.to_string(),
1210 }
1211 })
1212 } else {
1213 Err(ClientError::Serialization {
1214 message: "Invalid SSE format".to_string(),
1215 })
1216 }
1217 })
1218 });
1219
1220 Ok(Box::new(stream))
1221 }
1222
1223 async fn standalone_embedding(
1224 &self,
1225 request: EmbeddingRequest,
1226 ) -> Result<EmbeddingResponse, ClientError> {
1227 // Route to appropriate provider
1228 let router = self.router.read().await;
1229 let routing_context = RoutingContext {
1230 model: Some(request.model.clone()),
1231 user_region: None,
1232 request_size: serde_json::to_string(&request).unwrap_or_default().len() as u32,
1233 estimated_tokens: 0, // Embeddings don't have token estimation
1234 user_id: None,
1235 metadata: HashMap::new(),
1236 };
1237
1238 let provider_names: Vec<String> = self.providers.keys().cloned().collect();
1239 let provider_selection = router
1240 .select_provider(&provider_names, &routing_context)
1241 .ok_or_else(|| ClientError::Configuration {
1242 message: "No suitable provider found".to_string(),
1243 })?;
1244
1245 // Track the last used provider for metrics
1246 {
1247 let mut last_provider = self.last_used_provider.write().await;
1248 *last_provider = Some(provider_selection.provider_id.clone());
1249 }
1250
1251 let provider_id = provider_selection.provider_id;
1252 let provider =
1253 self.providers
1254 .get(&provider_id)
1255 .ok_or_else(|| ClientError::Configuration {
1256 message: format!("Provider {provider_id} not found"),
1257 })?;
1258
1259 // Execute with retry and fallback
1260 let result = self
1261 .execute_with_enhanced_retry(|| provider.embedding(request.clone()), &provider_id)
1262 .await;
1263
1264 match result {
1265 Ok(response) => {
1266 // Update metrics
1267 self.update_enhanced_metrics(&provider_id, true, 0, 0, 0.0)
1268 .await;
1269 Ok(response)
1270 }
1271 Err(error) => {
1272 // Update metrics
1273 self.update_enhanced_metrics(&provider_id, false, 0, 0, 0.0)
1274 .await;
1275
1276 // Try fallback providers
1277 if self.should_fallback(&error) {
1278 let fallback_providers: Vec<String> = self
1279 .providers
1280 .keys()
1281 .filter(|&id| id != &provider_id)
1282 .cloned()
1283 .collect();
1284
1285 if let Ok(response) = self
1286 .try_fallback_providers_embedding(
1287 &fallback_providers,
1288 &provider_id,
1289 request,
1290 )
1291 .await
1292 {
1293 return Ok(response);
1294 }
1295 }
1296
1297 Err(ClientError::Provider(error))
1298 }
1299 }
1300 }
1301
1302 async fn gateway_embedding(
1303 &self,
1304 request: EmbeddingRequest,
1305 ) -> Result<EmbeddingResponse, ClientError> {
1306 let url = format!("{}/v1/embeddings", self.base_url());
1307 self.gateway_request(url, request).await
1308 }
1309
1310 async fn standalone_image_generation(
1311 &self,
1312 request: ImageRequest,
1313 ) -> Result<ImageResponse, ClientError> {
1314 // Route to appropriate provider
1315 let router = self.router.read().await;
1316 let routing_context = RoutingContext {
1317 model: request.model.clone(),
1318 user_region: None,
1319 request_size: serde_json::to_string(&request).unwrap_or_default().len() as u32,
1320 estimated_tokens: 0, // Image generation doesn't have token estimation
1321 user_id: None,
1322 metadata: HashMap::new(),
1323 };
1324
1325 let provider_names: Vec<String> = self.providers.keys().cloned().collect();
1326 let provider_selection = router
1327 .select_provider(&provider_names, &routing_context)
1328 .ok_or_else(|| ClientError::Configuration {
1329 message: "No suitable provider found".to_string(),
1330 })?;
1331
1332 // Track the last used provider for metrics
1333 {
1334 let mut last_provider = self.last_used_provider.write().await;
1335 *last_provider = Some(provider_selection.provider_id.clone());
1336 }
1337
1338 let provider_id = provider_selection.provider_id;
1339 let provider =
1340 self.providers
1341 .get(&provider_id)
1342 .ok_or_else(|| ClientError::Configuration {
1343 message: format!("Provider {provider_id} not found"),
1344 })?;
1345
1346 // Execute with retry and fallback
1347 let result = self
1348 .execute_with_enhanced_retry(
1349 || provider.image_generation(request.clone()),
1350 &provider_id,
1351 )
1352 .await;
1353
1354 match result {
1355 Ok(response) => {
1356 // Update metrics
1357 self.update_enhanced_metrics(&provider_id, true, 0, 0, 0.0)
1358 .await;
1359 Ok(response)
1360 }
1361 Err(error) => {
1362 // Update metrics
1363 self.update_enhanced_metrics(&provider_id, false, 0, 0, 0.0)
1364 .await;
1365
1366 // Try fallback providers
1367 if self.should_fallback(&error) {
1368 let fallback_providers: Vec<String> = self
1369 .providers
1370 .keys()
1371 .filter(|&id| id != &provider_id)
1372 .cloned()
1373 .collect();
1374
1375 if let Ok(response) = self
1376 .try_fallback_providers_image(&fallback_providers, &provider_id, request)
1377 .await
1378 {
1379 return Ok(response);
1380 }
1381 }
1382
1383 Err(ClientError::Provider(error))
1384 }
1385 }
1386 }
1387
1388 async fn gateway_image_generation(
1389 &self,
1390 request: ImageRequest,
1391 ) -> Result<ImageResponse, ClientError> {
1392 let url = format!("{}/v1/images/generations", self.base_url());
1393 self.gateway_request(url, request).await
1394 }
1395
1396 async fn standalone_audio_transcription(
1397 &self,
1398 request: AudioRequest,
1399 ) -> Result<AudioResponse, ClientError> {
1400 // Route to appropriate provider
1401 let router = self.router.read().await;
1402 let routing_context = RoutingContext {
1403 model: Some(request.model.clone()),
1404 user_region: None,
1405 request_size: serde_json::to_string(&request).unwrap_or_default().len() as u32,
1406 estimated_tokens: 0, // Audio transcription doesn't have token estimation
1407 user_id: None,
1408 metadata: HashMap::new(),
1409 };
1410
1411 let provider_names: Vec<String> = self.providers.keys().cloned().collect();
1412 let provider_selection = router
1413 .select_provider(&provider_names, &routing_context)
1414 .ok_or_else(|| ClientError::Configuration {
1415 message: "No suitable provider found".to_string(),
1416 })?;
1417
1418 // Track the last used provider for metrics
1419 {
1420 let mut last_provider = self.last_used_provider.write().await;
1421 *last_provider = Some(provider_selection.provider_id.clone());
1422 }
1423
1424 let provider_id = provider_selection.provider_id;
1425 let provider =
1426 self.providers
1427 .get(&provider_id)
1428 .ok_or_else(|| ClientError::Configuration {
1429 message: format!("Provider {provider_id} not found"),
1430 })?;
1431
1432 // Execute with retry and fallback
1433 let result = self
1434 .execute_with_enhanced_retry(
1435 || provider.audio_transcription(request.clone()),
1436 &provider_id,
1437 )
1438 .await;
1439
1440 match result {
1441 Ok(response) => {
1442 // Update metrics
1443 self.update_enhanced_metrics(&provider_id, true, 0, 0, 0.0)
1444 .await;
1445 Ok(response)
1446 }
1447 Err(error) => {
1448 // Update metrics
1449 self.update_enhanced_metrics(&provider_id, false, 0, 0, 0.0)
1450 .await;
1451
1452 // Try fallback providers
1453 if self.should_fallback(&error) {
1454 let fallback_providers: Vec<String> = self
1455 .providers
1456 .keys()
1457 .filter(|&id| id != &provider_id)
1458 .cloned()
1459 .collect();
1460
1461 if let Ok(response) = self
1462 .try_fallback_providers_audio(&fallback_providers, &provider_id, request)
1463 .await
1464 {
1465 return Ok(response);
1466 }
1467 }
1468
1469 Err(ClientError::Provider(error))
1470 }
1471 }
1472 }
1473
1474 async fn gateway_audio_transcription(
1475 &self,
1476 request: AudioRequest,
1477 ) -> Result<AudioResponse, ClientError> {
1478 let url = format!("{}/v1/audio/transcriptions", self.base_url());
1479 self.gateway_request(url, request).await
1480 }
1481
1482 async fn standalone_text_to_speech(
1483 &self,
1484 request: SpeechRequest,
1485 ) -> Result<SpeechResponse, ClientError> {
1486 // Route to appropriate provider
1487 let router = self.router.read().await;
1488 let routing_context = RoutingContext {
1489 model: Some(request.model.clone()),
1490 user_region: None,
1491 request_size: serde_json::to_string(&request).unwrap_or_default().len() as u32,
1492 estimated_tokens: 0, // Text-to-speech doesn't have token estimation
1493 user_id: None,
1494 metadata: HashMap::new(),
1495 };
1496
1497 let provider_names: Vec<String> = self.providers.keys().cloned().collect();
1498 let provider_selection = router
1499 .select_provider(&provider_names, &routing_context)
1500 .ok_or_else(|| ClientError::Configuration {
1501 message: "No suitable provider found".to_string(),
1502 })?;
1503
1504 // Track the last used provider for metrics
1505 {
1506 let mut last_provider = self.last_used_provider.write().await;
1507 *last_provider = Some(provider_selection.provider_id.clone());
1508 }
1509
1510 let provider_id = provider_selection.provider_id;
1511 let provider =
1512 self.providers
1513 .get(&provider_id)
1514 .ok_or_else(|| ClientError::Configuration {
1515 message: format!("Provider {provider_id} not found"),
1516 })?;
1517
1518 // Execute with retry and fallback
1519 let result = self
1520 .execute_with_enhanced_retry(|| provider.text_to_speech(request.clone()), &provider_id)
1521 .await;
1522
1523 match result {
1524 Ok(response) => {
1525 // Update metrics
1526 self.update_enhanced_metrics(&provider_id, true, 0, 0, 0.0)
1527 .await;
1528 Ok(response)
1529 }
1530 Err(error) => {
1531 // Update metrics
1532 self.update_enhanced_metrics(&provider_id, false, 0, 0, 0.0)
1533 .await;
1534
1535 // Try fallback providers
1536 if self.should_fallback(&error) {
1537 let fallback_providers: Vec<String> = self
1538 .providers
1539 .keys()
1540 .filter(|&id| id != &provider_id)
1541 .cloned()
1542 .collect();
1543
1544 if let Ok(response) = self
1545 .try_fallback_providers_speech(&fallback_providers, &provider_id, request)
1546 .await
1547 {
1548 return Ok(response);
1549 }
1550 }
1551
1552 Err(ClientError::Provider(error))
1553 }
1554 }
1555 }
1556
1557 async fn gateway_text_to_speech(
1558 &self,
1559 request: SpeechRequest,
1560 ) -> Result<SpeechResponse, ClientError> {
1561 let url = format!("{}/v1/audio/speech", self.base_url());
1562 self.gateway_request(url, request).await
1563 }
1564
1565 // Helper methods
1566 fn base_url(&self) -> &str {
1567 match &self.mode {
1568 ClientMode::Gateway { base_url } => base_url,
1569 _ => unreachable!(),
1570 }
1571 }
1572
1573 async fn gateway_request<T, R>(&self, url: String, request: T) -> Result<R, ClientError>
1574 where
1575 T: serde::Serialize,
1576 R: serde::de::DeserializeOwned,
1577 {
1578 let response = self
1579 .http_client
1580 .post(&url)
1581 .header(
1582 "Authorization",
1583 format!(
1584 "Bearer {}",
1585 self.api_key.as_ref().unwrap_or(&"".to_string())
1586 ),
1587 )
1588 .json(&request)
1589 .send()
1590 .await
1591 .map_err(|e| ClientError::NetworkError {
1592 message: e.to_string(),
1593 })?;
1594
1595 if !response.status().is_success() {
1596 return Err(ClientError::Provider(
1597 crate::error::ProviderError::ServiceUnavailable,
1598 ));
1599 }
1600
1601 let result = response
1602 .json::<R>()
1603 .await
1604 .map_err(|e| ClientError::Serialization {
1605 message: e.to_string(),
1606 })?;
1607
1608 Ok(result)
1609 }
1610
1611 fn should_fallback(&self, error: &crate::error::ProviderError) -> bool {
1612 matches!(
1613 error,
1614 crate::error::ProviderError::RateLimit
1615 | crate::error::ProviderError::ServiceUnavailable
1616 | crate::error::ProviderError::Timeout
1617 )
1618 }
1619
1620 #[allow(dead_code)]
1621 async fn try_fallback_providers(
1622 &self,
1623 provider_ids: &[String],
1624 failed_provider: &str,
1625 request: ChatRequest,
1626 ) -> Result<ChatResponse, ClientError> {
1627 for provider_id in provider_ids {
1628 if provider_id != failed_provider {
1629 if let Some(provider) = self.providers.get(provider_id) {
1630 match provider.chat_completion(request.clone()).await {
1631 Ok(response) => return Ok(response),
1632 Err(_) => continue,
1633 }
1634 }
1635 }
1636 }
1637 Err(ClientError::Provider(
1638 crate::error::ProviderError::ServiceUnavailable,
1639 ))
1640 }
1641
1642 // Helper methods for fallback providers
1643 async fn try_fallback_providers_image(
1644 &self,
1645 provider_ids: &[String],
1646 _failed_provider: &str,
1647 request: ImageRequest,
1648 ) -> Result<ImageResponse, ClientError> {
1649 for provider_id in provider_ids {
1650 if let Some(provider) = self.providers.get(provider_id) {
1651 if let Ok(response) = provider.image_generation(request.clone()).await {
1652 // Update last used provider
1653 {
1654 let mut last_provider = self.last_used_provider.write().await;
1655 *last_provider = Some(provider_id.clone());
1656 }
1657 return Ok(response);
1658 }
1659 }
1660 }
1661
1662 Err(ClientError::Configuration {
1663 message: "All providers failed for image generation, including fallbacks".to_string(),
1664 })
1665 }
1666
1667 async fn try_fallback_providers_audio(
1668 &self,
1669 provider_ids: &[String],
1670 _failed_provider: &str,
1671 request: AudioRequest,
1672 ) -> Result<AudioResponse, ClientError> {
1673 for provider_id in provider_ids {
1674 if let Some(provider) = self.providers.get(provider_id) {
1675 if let Ok(response) = provider.audio_transcription(request.clone()).await {
1676 // Update last used provider
1677 {
1678 let mut last_provider = self.last_used_provider.write().await;
1679 *last_provider = Some(provider_id.clone());
1680 }
1681 return Ok(response);
1682 }
1683 }
1684 }
1685
1686 Err(ClientError::Configuration {
1687 message: "All providers failed for audio transcription, including fallbacks"
1688 .to_string(),
1689 })
1690 }
1691
1692 async fn try_fallback_providers_speech(
1693 &self,
1694 provider_ids: &[String],
1695 _failed_provider: &str,
1696 request: SpeechRequest,
1697 ) -> Result<SpeechResponse, ClientError> {
1698 for provider_id in provider_ids {
1699 if let Some(provider) = self.providers.get(provider_id) {
1700 if let Ok(response) = provider.text_to_speech(request.clone()).await {
1701 // Update last used provider
1702 {
1703 let mut last_provider = self.last_used_provider.write().await;
1704 *last_provider = Some(provider_id.clone());
1705 }
1706 return Ok(response);
1707 }
1708 }
1709 }
1710
1711 Err(ClientError::Configuration {
1712 message: "All providers failed for text-to-speech, including fallbacks".to_string(),
1713 })
1714 }
1715
1716 async fn try_fallback_providers_embedding(
1717 &self,
1718 provider_ids: &[String],
1719 failed_provider: &str,
1720 request: EmbeddingRequest,
1721 ) -> Result<EmbeddingResponse, ClientError> {
1722 for provider_id in provider_ids {
1723 if provider_id != failed_provider {
1724 if let Some(provider) = self.providers.get(provider_id) {
1725 if let Ok(response) = provider.embedding(request.clone()).await {
1726 // Update last used provider
1727 {
1728 let mut last_provider = self.last_used_provider.write().await;
1729 *last_provider = Some(provider_id.clone());
1730 }
1731 return Ok(response);
1732 }
1733 }
1734 }
1735 }
1736
1737 Err(ClientError::Configuration {
1738 message: "All providers failed for embedding, including fallbacks".to_string(),
1739 })
1740 }
1741}
1742
1743/// Builder for creating `UltrafastClient` instances with custom configuration.
1744///
1745/// The `UltrafastClientBuilder` provides a fluent API for configuring and creating
1746/// `UltrafastClient` instances. It supports both standalone and gateway modes.
1747///
1748/// # Examples
1749///
1750/// ## Standalone Mode
1751///
1752/// ```rust
1753/// use ultrafast_models_sdk::{UltrafastClient, RetryPolicy};
1754/// use std::time::Duration;
1755///
1756/// let retry_policy = RetryPolicy {
1757/// max_retries: 5,
1758/// initial_delay: Duration::from_millis(100),
1759/// max_delay: Duration::from_secs(10),
1760/// backoff_multiplier: 2.0,
1761/// jitter_factor: 0.1,
1762/// };
1763///
1764/// let client = UltrafastClientBuilder::default()
1765/// .with_retry_policy(retry_policy)
1766/// .standalone()
1767/// .with_openai("your-openai-key")
1768/// .with_anthropic("your-anthropic-key")
1769/// .build()?;
1770/// ```
1771///
1772/// ## Gateway Mode
1773///
1774/// ```rust
1775/// let client = UltrafastClientBuilder::default()
1776/// .gateway("http://localhost:3000".to_string())
1777/// .with_api_key("your-gateway-key")
1778/// .with_timeout(Duration::from_secs(60))
1779/// .build()?;
1780/// ```
1781///
1782/// # Builder Pattern
1783///
1784/// The builder follows the fluent builder pattern, allowing method chaining:
1785///
1786/// ```rust
1787/// let client = UltrafastClientBuilder::default()
1788/// .with_retry_policy(custom_retry_policy)
1789/// .standalone()
1790/// .with_openai("key1")
1791/// .with_anthropic("key2")
1792/// .with_routing_strategy(RoutingStrategy::LoadBalance {
1793/// weights: vec![0.6, 0.4],
1794/// })
1795/// .with_cache_config(cache_config)
1796/// .build()?;
1797/// ```
1798///
1799/// # Configuration Options
1800///
1801/// ## Retry Policy
1802///
1803/// Configure retry behavior for failed requests:
1804///
1805/// - **Max Retries**: Maximum number of retry attempts
1806/// - **Initial Delay**: Starting delay before first retry
1807/// - **Max Delay**: Maximum delay between retries
1808/// - **Backoff Multiplier**: Exponential backoff factor
1809/// - **Jitter Factor**: Randomization to prevent thundering herd
1810///
1811/// ## Provider Configuration
1812///
1813/// Add and configure AI providers:
1814///
1815/// - **OpenAI**: GPT models with API key
1816/// - **Anthropic**: Claude models with API key
1817/// - **Azure OpenAI**: Azure-hosted OpenAI models
1818/// - **Google Vertex AI**: Google AI models
1819/// - **Cohere**: Command models
1820/// - **Groq**: Fast inference models
1821/// - **Ollama**: Local models
1822/// - **Custom Providers**: Extensible provider system
1823///
1824/// ## Routing Strategy
1825///
1826/// Choose how requests are routed to providers:
1827///
1828/// - **Single**: Route all requests to one provider
1829/// - **Load Balance**: Distribute requests across providers
1830/// - **Failover**: Primary provider with automatic fallback
1831/// - **Conditional**: Route based on request characteristics
1832/// - **A/B Testing**: Route for testing different providers
1833///
1834/// ## Caching
1835///
1836/// Configure response caching:
1837///
1838/// - **Backend**: In-memory or Redis cache
1839/// - **TTL**: Time-to-live for cached responses
1840/// - **Max Size**: Maximum number of cached items
1841/// - **Key Strategy**: Custom cache key generation
1842///
1843/// # Thread Safety
1844///
1845/// The builder is not thread-safe and should not be shared across threads.
1846/// Build the client first, then share the client instance.
1847///
1848/// # Performance Considerations
1849///
1850/// - **Connection Pooling**: Configure appropriate pool sizes
1851/// - **Timeout Settings**: Set realistic timeouts for your use case
1852/// - **Retry Policies**: Balance retry attempts with user experience
1853/// - **Cache Configuration**: Enable caching for repeated requests
1854///
1855/// # Error Handling
1856///
1857/// The builder validates configuration and returns errors for:
1858///
1859/// - Invalid provider configurations
1860/// - Missing required fields
1861/// - Configuration conflicts
1862/// - Network connectivity issues
1863///
1864/// # See Also
1865///
1866/// - [`UltrafastClient`] - The main client struct
1867/// - [`StandaloneClientBuilder`] - For standalone mode configuration
1868/// - [`GatewayClientBuilder`] - For gateway mode configuration
1869/// - [`RetryPolicy`] - For retry configuration
1870/// - [`CacheConfig`] - For cache configuration
1871#[derive(Default)]
1872pub struct UltrafastClientBuilder {
1873 retry_policy: RetryPolicy,
1874}
1875
1876impl UltrafastClientBuilder {
1877 pub fn with_retry_policy(mut self, retry_policy: RetryPolicy) -> Self {
1878 self.retry_policy = retry_policy;
1879 self
1880 }
1881
1882 pub fn standalone(self) -> StandaloneClientBuilder {
1883 StandaloneClientBuilder {
1884 providers: HashMap::new(),
1885 routing_strategy: RoutingStrategy::Single,
1886 cache_config: None,
1887 retry_policy: self.retry_policy,
1888 }
1889 }
1890
1891 pub fn gateway(self, base_url: String) -> GatewayClientBuilder {
1892 GatewayClientBuilder {
1893 base_url,
1894 api_key: None,
1895 timeout: Duration::from_secs(30),
1896 retry_policy: self.retry_policy,
1897 }
1898 }
1899}
1900
1901/// Builder for creating standalone mode `UltrafastClient` instances.
1902///
1903/// The `StandaloneClientBuilder` is used to configure clients that communicate
1904/// directly with AI providers without going through a gateway.
1905///
1906/// # Features
1907///
1908/// - **Direct Provider Communication**: Bypass gateway for lower latency
1909/// - **Provider Management**: Add and configure multiple AI providers
1910/// - **Routing Strategies**: Choose how requests are distributed
1911/// - **Caching**: Configure response caching for performance
1912/// - **Retry Policies**: Customize retry behavior
1913///
1914/// # Examples
1915///
1916/// ## Basic Setup
1917///
1918/// ```rust
1919/// let client = StandaloneClientBuilder::default()
1920/// .with_openai("your-openai-key")
1921/// .build()?;
1922/// ```
1923///
1924/// ## Multi-Provider Setup
1925///
1926/// ```rust
1927/// let client = StandaloneClientBuilder::default()
1928/// .with_openai("openai-key")
1929/// .with_anthropic("anthropic-key")
1930/// .with_google_vertex_ai("google-key", "project-id")
1931/// .with_ollama("http://localhost:11434")
1932/// .with_routing_strategy(RoutingStrategy::LoadBalance {
1933/// weights: vec![0.4, 0.3, 0.2, 0.1],
1934/// })
1935/// .build()?;
1936/// ```
1937///
1938/// ## Advanced Configuration
1939///
1940/// ```rust
1941/// use ultrafast_models_sdk::{CacheConfig, RoutingStrategy};
1942/// use std::time::Duration;
1943///
1944/// let cache_config = CacheConfig {
1945/// enabled: true,
1946/// ttl: Duration::from_hours(1),
1947/// max_size: 1000,
1948/// backend: CacheBackend::Memory,
1949/// };
1950///
1951/// let client = StandaloneClientBuilder::default()
1952/// .with_openai("your-key")
1953/// .with_routing_strategy(RoutingStrategy::Failover)
1954/// .with_cache_config(cache_config)
1955/// .build()?;
1956/// ```
1957///
1958/// # Provider Methods
1959///
1960/// ## OpenAI
1961///
1962/// ```rust
1963/// .with_openai("your-openai-api-key")
1964/// ```
1965///
1966/// ## Anthropic
1967///
1968/// ```rust
1969/// .with_anthropic("your-anthropic-api-key")
1970/// ```
1971///
1972/// ## Azure OpenAI
1973///
1974/// ```rust
1975/// .with_azure_openai("your-azure-key", "deployment-name")
1976/// ```
1977///
1978/// ## Google Vertex AI
1979///
1980/// ```rust
1981/// .with_google_vertex_ai("your-google-key", "project-id")
1982/// ```
1983///
1984/// ## Cohere
1985///
1986/// ```rust
1987/// .with_cohere("your-cohere-api-key")
1988/// ```
1989///
1990/// ## Groq
1991///
1992/// ```rust
1993/// .with_groq("your-groq-api-key")
1994/// ```
1995///
1996/// ## Ollama
1997///
1998/// ```rust
1999/// .with_ollama("http://localhost:11434")
2000/// ```
2001///
2002/// ## Custom Providers
2003///
2004/// ```rust
2005/// let custom_config = ProviderConfig::new("custom", "api-key");
2006/// .with_provider("custom", custom_config)
2007/// ```
2008///
2009/// # Routing Strategies
2010///
2011/// ## Single Provider
2012///
2013/// ```rust
2014/// .with_routing_strategy(RoutingStrategy::Single)
2015/// ```
2016///
2017/// ## Load Balancing
2018///
2019/// ```rust
2020/// .with_routing_strategy(RoutingStrategy::LoadBalance {
2021/// weights: vec![0.6, 0.4], // 60% OpenAI, 40% Anthropic
2022/// })
2023/// ```
2024///
2025/// ## Failover
2026///
2027/// ```rust
2028/// .with_routing_strategy(RoutingStrategy::Failover)
2029/// ```
2030///
2031/// ## Conditional Routing
2032///
2033/// ```rust
2034/// .with_routing_strategy(RoutingStrategy::Conditional {
2035/// conditions: vec![
2036/// ("model", "gpt-4", "openai"),
2037/// ("model", "claude-3", "anthropic"),
2038/// ],
2039/// default: "openai".to_string(),
2040/// })
2041/// ```
2042///
2043/// ## A/B Testing
2044///
2045/// ```rust
2046/// .with_routing_strategy(RoutingStrategy::ABTesting {
2047/// split: 0.5, // 50% to each provider
2048/// })
2049/// ```
2050///
2051/// # Caching Configuration
2052///
2053/// ```rust
2054/// let cache_config = CacheConfig {
2055/// enabled: true,
2056/// ttl: Duration::from_hours(1),
2057/// max_size: 1000,
2058/// backend: CacheBackend::Memory,
2059/// };
2060///
2061/// .with_cache_config(cache_config)
2062/// ```
2063///
2064/// # Performance Optimization
2065///
2066/// - **Provider Selection**: Choose providers based on your needs
2067/// - **Routing Strategy**: Optimize for latency, cost, or reliability
2068/// - **Caching**: Enable caching for repeated requests
2069/// - **Connection Pooling**: Configure appropriate pool sizes
2070///
2071/// # Error Handling
2072///
2073/// The builder validates configuration and returns errors for:
2074///
2075/// - Missing provider configurations
2076/// - Invalid routing strategies
2077/// - Configuration conflicts
2078/// - Network connectivity issues
2079///
2080/// # Thread Safety
2081///
2082/// The builder is not thread-safe. Build the client first, then share the client instance.
2083///
2084/// # See Also
2085///
2086/// - [`UltrafastClient`] - The main client struct
2087/// - [`UltrafastClientBuilder`] - The main builder
2088/// - [`GatewayClientBuilder`] - For gateway mode
2089/// - [`ProviderConfig`] - For provider configuration
2090/// - [`CacheConfig`] - For cache configuration
2091pub struct StandaloneClientBuilder {
2092 providers: HashMap<String, ProviderConfig>,
2093 routing_strategy: RoutingStrategy,
2094 cache_config: Option<CacheConfig>,
2095 retry_policy: RetryPolicy,
2096}
2097
2098impl Default for StandaloneClientBuilder {
2099 fn default() -> Self {
2100 Self {
2101 providers: HashMap::new(),
2102 routing_strategy: RoutingStrategy::Single,
2103 cache_config: None,
2104 retry_policy: RetryPolicy::default(),
2105 }
2106 }
2107}
2108
2109impl StandaloneClientBuilder {
2110 pub fn with_provider(mut self, name: impl Into<String>, config: ProviderConfig) -> Self {
2111 self.providers.insert(name.into(), config);
2112 self
2113 }
2114
2115 pub fn with_openai(self, api_key: impl Into<String>) -> Self {
2116 let config = ProviderConfig::new("openai", api_key);
2117 self.with_provider("openai", config)
2118 }
2119
2120 pub fn with_anthropic(self, api_key: impl Into<String>) -> Self {
2121 let config = ProviderConfig::new("anthropic", api_key);
2122 self.with_provider("anthropic", config)
2123 }
2124
2125 pub fn with_azure_openai(
2126 self,
2127 api_key: impl Into<String>,
2128 deployment_name: impl Into<String>,
2129 ) -> Self {
2130 let mut config = ProviderConfig::new("azure-openai", api_key);
2131 config.name = deployment_name.into();
2132 self.with_provider("azure-openai", config)
2133 }
2134
2135 pub fn with_google_vertex_ai(
2136 self,
2137 api_key: impl Into<String>,
2138 project_id: impl Into<String>,
2139 ) -> Self {
2140 let mut config = ProviderConfig::new("google-vertex-ai", api_key);
2141 config
2142 .headers
2143 .insert("project-id".to_string(), project_id.into());
2144 self.with_provider("google-vertex-ai", config)
2145 }
2146
2147 pub fn with_cohere(self, api_key: impl Into<String>) -> Self {
2148 let config = ProviderConfig::new("cohere", api_key);
2149 self.with_provider("cohere", config)
2150 }
2151
2152 pub fn with_groq(self, api_key: impl Into<String>) -> Self {
2153 let config = ProviderConfig::new("groq", api_key);
2154 self.with_provider("groq", config)
2155 }
2156
2157 pub fn with_mistral(self, api_key: impl Into<String>) -> Self {
2158 let config = ProviderConfig::new("mistral", api_key);
2159 self.with_provider("mistral", config)
2160 }
2161
2162 pub fn with_perplexity(self, api_key: impl Into<String>) -> Self {
2163 let config = ProviderConfig::new("perplexity", api_key);
2164 self.with_provider("perplexity", config)
2165 }
2166
2167 pub fn with_openrouter(self, api_key: impl Into<String>) -> Self {
2168 let config = ProviderConfig::new("openrouter", api_key);
2169 self.with_provider("openrouter", config)
2170 }
2171
2172 pub fn with_ollama(self, base_url: impl Into<String>) -> Self {
2173 let mut config = ProviderConfig::new("ollama", "");
2174 config.base_url = Some(base_url.into());
2175 self.with_provider("ollama", config)
2176 }
2177
2178 pub fn with_custom(
2179 self,
2180 name: impl Into<String>,
2181 api_key: impl Into<String>,
2182 base_url: impl Into<String>,
2183 ) -> Self {
2184 let mut config = ProviderConfig::new("custom", api_key);
2185 config.name = name.into();
2186 config.base_url = Some(base_url.into());
2187 self.with_provider("custom", config)
2188 }
2189
2190 pub fn with_routing_strategy(mut self, strategy: RoutingStrategy) -> Self {
2191 self.routing_strategy = strategy;
2192 self
2193 }
2194
2195 pub fn with_cache(mut self, config: CacheConfig) -> Self {
2196 self.cache_config = Some(config);
2197 self
2198 }
2199
2200 pub fn build(self) -> Result<UltrafastClient, ClientError> {
2201 if self.providers.is_empty() {
2202 return Err(ClientError::Configuration {
2203 message: "At least one provider must be configured".to_string(),
2204 });
2205 }
2206
2207 let mut providers = HashMap::new();
2208 for (name, config) in self.providers {
2209 // Use circuit breaker by default for all providers
2210 let provider = create_provider_with_circuit_breaker(config, None)?;
2211 providers.insert(name, provider.into());
2212 }
2213
2214 let cache = self.cache_config.map(|config| {
2215 let cache: Arc<dyn Cache> = Arc::new(InMemoryCache::new(config.max_size));
2216 cache
2217 });
2218
2219 // Create optimized HTTP client for standalone mode too
2220 let http_client = Client::builder()
2221 .pool_max_idle_per_host(20)
2222 .pool_idle_timeout(Duration::from_secs(60))
2223 .build()
2224 .map_err(|e| ClientError::Configuration {
2225 message: format!("Failed to create HTTP client: {e}"),
2226 })?;
2227
2228 Ok(UltrafastClient {
2229 mode: ClientMode::Standalone,
2230 providers,
2231 router: Arc::new(RwLock::new(Router::new(self.routing_strategy))),
2232 cache,
2233 metrics: Arc::new(RwLock::new(HashMap::new())),
2234 http_client,
2235 api_key: None,
2236 timeout: Duration::from_secs(30),
2237 retry_policy: self.retry_policy,
2238 connection_pool: Arc::new(RwLock::new(ConnectionPool::new(
2239 20,
2240 Duration::from_secs(60),
2241 Duration::from_secs(60),
2242 ))),
2243 last_used_provider: Arc::new(RwLock::new(None)),
2244 })
2245 }
2246}
2247
2248pub struct GatewayClientBuilder {
2249 base_url: String,
2250 api_key: Option<String>,
2251 timeout: Duration,
2252 retry_policy: RetryPolicy,
2253}
2254
2255impl GatewayClientBuilder {
2256 pub fn new(base_url: String) -> Self {
2257 Self {
2258 base_url,
2259 api_key: None,
2260 timeout: Duration::from_secs(30),
2261 retry_policy: RetryPolicy::default(),
2262 }
2263 }
2264
2265 pub fn with_api_key(mut self, api_key: impl Into<String>) -> Self {
2266 self.api_key = Some(api_key.into());
2267 self
2268 }
2269
2270 pub fn with_timeout(mut self, timeout: Duration) -> Self {
2271 self.timeout = timeout;
2272 self
2273 }
2274
2275 pub fn build(self) -> Result<UltrafastClient, ClientError> {
2276 let http_client = Client::builder()
2277 .timeout(self.timeout)
2278 // Phase 1 Optimizations: Connection pooling, keep-alive
2279 .pool_max_idle_per_host(20) // Increased connection pool
2280 .pool_idle_timeout(Duration::from_secs(60)) // Keep connections alive longer
2281 .build()
2282 .map_err(|e| ClientError::Configuration {
2283 message: format!("Failed to create HTTP client: {e}"),
2284 })?;
2285
2286 Ok(UltrafastClient {
2287 mode: ClientMode::Gateway {
2288 base_url: self.base_url,
2289 },
2290 providers: HashMap::new(),
2291 router: Arc::new(RwLock::new(Router::new(RoutingStrategy::Single))),
2292 cache: None,
2293 metrics: Arc::new(RwLock::new(HashMap::new())),
2294 http_client,
2295 api_key: self.api_key,
2296 timeout: self.timeout,
2297 retry_policy: self.retry_policy,
2298 connection_pool: Arc::new(RwLock::new(ConnectionPool::new(
2299 20,
2300 Duration::from_secs(60),
2301 Duration::from_secs(60),
2302 ))),
2303 last_used_provider: Arc::new(RwLock::new(None)),
2304 })
2305 }
2306}
2307
2308#[cfg(test)]
2309mod tests {
2310 use super::*;
2311
2312 #[test]
2313 fn test_client_creation_with_circuit_breaker() {
2314 let client = UltrafastClient::standalone()
2315 .with_openai("test-key")
2316 .build();
2317
2318 assert!(client.is_ok());
2319 }
2320
2321 #[tokio::test]
2322 async fn test_circuit_breaker_integration() {
2323 let client = UltrafastClient::standalone()
2324 .with_openai("test-key")
2325 .build()
2326 .unwrap();
2327
2328 // Test that circuit breaker metrics are available
2329 let cb_metrics = client.get_circuit_breaker_metrics().await;
2330 assert!(!cb_metrics.is_empty());
2331
2332 // Test that health status is available
2333 let health_status = client.get_provider_health_status().await;
2334 assert!(!health_status.is_empty());
2335
2336 // All providers should be healthy initially
2337 for (_, is_healthy) in health_status {
2338 assert!(is_healthy);
2339 }
2340 }
2341}