ai_lib/
client.rs

1use crate::api::{ChatApi, ChatCompletionChunk};
2use crate::config::{ConnectionOptions, ResilienceConfig};
3use crate::metrics::{Metrics, NoopMetrics};
4use crate::provider::{
5    classification::ProviderClassification, AI21Adapter, CohereAdapter, GeminiAdapter, GenericAdapter,
6    MistralAdapter, OpenAiAdapter, PerplexityAdapter, ProviderConfigs,
7};
8use crate::types::{AiLibError, ChatCompletionRequest, ChatCompletionResponse};
9use futures::stream::Stream;
10use futures::Future;
11use std::sync::Arc;
12use tokio::sync::oneshot;
13use crate::rate_limiter::{BackpressureController, BackpressurePermit};
14
15// Note: Some variables are marked as `mut` because they are modified within conditional compilation
16// blocks (`#[cfg(feature = "routing_mvp")]`). The linter may not detect these modifications.
17
18/// Model configuration options for explicit model selection
19#[derive(Debug, Clone)]
20pub struct ModelOptions {
21    pub chat_model: Option<String>,
22    pub multimodal_model: Option<String>,
23    pub fallback_models: Vec<String>,
24    pub auto_discovery: bool,
25}
26
27impl Default for ModelOptions {
28    fn default() -> Self {
29        Self {
30            chat_model: None,
31            multimodal_model: None,
32            fallback_models: Vec::new(),
33            auto_discovery: true,
34        }
35    }
36}
37
38impl ModelOptions {
39    /// Create default model options
40    pub fn new() -> Self {
41        Self::default()
42    }
43
44    /// Set chat model
45    pub fn with_chat_model(mut self, model: &str) -> Self {
46        self.chat_model = Some(model.to_string());
47        self
48    }
49
50    /// Set multimodal model
51    pub fn with_multimodal_model(mut self, model: &str) -> Self {
52        self.multimodal_model = Some(model.to_string());
53        self
54    }
55
56    /// Set fallback models
57    pub fn with_fallback_models(mut self, models: Vec<&str>) -> Self {
58        self.fallback_models = models.into_iter().map(|s| s.to_string()).collect();
59        self
60    }
61
62    /// Enable or disable auto discovery
63    pub fn with_auto_discovery(mut self, enabled: bool) -> Self {
64        self.auto_discovery = enabled;
65        self
66    }
67}
68
69/// Helper function to create GenericAdapter with optional custom transport
70fn create_generic_adapter(
71    config: crate::provider::config::ProviderConfig,
72    transport: Option<crate::transport::DynHttpTransportRef>,
73) -> Result<Box<dyn ChatApi>, AiLibError> {
74    if let Some(custom_transport) = transport {
75        Ok(Box::new(GenericAdapter::with_transport_ref(
76            config,
77            custom_transport,
78        )?))
79    } else {
80        Ok(Box::new(GenericAdapter::new(config)?))
81    }
82}
83
84/// Unified AI client module
85///
86/// AI model provider enumeration
87#[derive(Debug, Clone, Copy, PartialEq)]
88pub enum Provider {
89    // Config-driven providers
90    Groq,
91    XaiGrok,
92    Ollama,
93    DeepSeek,
94    Anthropic,
95    AzureOpenAI,
96    HuggingFace,
97    TogetherAI,
98    OpenRouter,
99    Replicate,
100    // Chinese providers (OpenAI-compatible or config-driven)
101    BaiduWenxin,
102    TencentHunyuan,
103    IflytekSpark,
104    Moonshot,
105    ZhipuAI,
106    MiniMax,
107    // Independent adapters
108    OpenAI,
109    Qwen,
110    Gemini,
111    Mistral,
112    Cohere,
113    Perplexity,
114    AI21,
115    // Bedrock removed (deferred)
116}
117
118impl Provider {
119    /// Get the provider's preferred default chat model.
120    /// These should mirror the values used inside `ProviderConfigs`.
121    pub fn default_chat_model(&self) -> &'static str {
122        match self {
123            Provider::Groq => "llama-3.1-8b-instant",
124            Provider::XaiGrok => "grok-beta",
125            Provider::Ollama => "llama3-8b",
126            Provider::DeepSeek => "deepseek-chat",
127            Provider::Anthropic => "claude-3-5-sonnet-20241022",
128            Provider::AzureOpenAI => "gpt-35-turbo",
129            Provider::HuggingFace => "microsoft/DialoGPT-medium",
130            Provider::TogetherAI => "meta-llama/Llama-3-8b-chat-hf",
131            Provider::OpenRouter => "openai/gpt-3.5-turbo",
132            Provider::Replicate => "meta/llama-2-7b-chat",
133            Provider::BaiduWenxin => "ernie-3.5",
134            Provider::TencentHunyuan => "hunyuan-standard",
135            Provider::IflytekSpark => "spark-v3.0",
136            Provider::Moonshot => "moonshot-v1-8k",
137            Provider::ZhipuAI => "glm-4",
138            Provider::MiniMax => "abab6.5-chat",
139            Provider::OpenAI => "gpt-3.5-turbo",
140            Provider::Qwen => "qwen-turbo",
141            Provider::Gemini => "gemini-1.5-flash", // current v1beta-supported chat model
142            Provider::Mistral => "mistral-small",   // generic default
143            Provider::Cohere => "command-r",        // chat-capable model
144            Provider::Perplexity => "llama-3.1-sonar-small-128k-online",
145            Provider::AI21 => "j2-ultra",
146        }
147    }
148
149    /// Get the provider's preferred multimodal model (if any).
150    pub fn default_multimodal_model(&self) -> Option<&'static str> {
151        match self {
152            Provider::OpenAI => Some("gpt-4o"),
153            Provider::AzureOpenAI => Some("gpt-4o"),
154            Provider::Anthropic => Some("claude-3-5-sonnet-20241022"),
155            Provider::Groq => None, // No multimodal model currently available
156            Provider::Gemini => Some("gemini-1.5-flash"),
157            Provider::Cohere => Some("command-r-plus"),
158            Provider::OpenRouter => Some("openai/gpt-4o"),
159            Provider::Replicate => Some("meta/llama-2-7b-chat"),
160            Provider::ZhipuAI => Some("glm-4v"),
161            Provider::MiniMax => Some("abab6.5-chat"),
162            Provider::Perplexity => Some("llama-3.1-sonar-small-128k-online"),
163            Provider::AI21 => Some("j2-ultra"),
164            // Others presently have no clearly documented multimodal endpoint or are not yet wired.
165            _ => None,
166        }
167    }
168}
169
170/// Unified AI client
171///
172/// Usage example:
173/// ```rust
174/// use ai_lib::{AiClient, Provider, ChatCompletionRequest, Message, Role};
175///
176/// #[tokio::main]
177/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
178///     // Switch model provider by changing Provider value
179///     let client = AiClient::new(Provider::Groq)?;
180///     
181///     let request = ChatCompletionRequest::new(
182///         "test-model".to_string(),
183///         vec![Message {
184///             role: Role::User,
185///             content: ai_lib::types::common::Content::Text("Hello".to_string()),
186///             function_call: None,
187///         }],
188///     );
189///     
190///     // Note: Set GROQ_API_KEY environment variable for actual API calls
191///     // Optional: Set AI_PROXY_URL environment variable to use proxy server
192///     // let response = client.chat_completion(request).await?;
193///     
194///     println!("Client created successfully with provider: {:?}", client.current_provider());
195///     println!("Request prepared for model: {}", request.model);
196///     
197///     Ok(())
198/// }
199/// ```
200///
201/// # Proxy Configuration
202///
203/// Configure proxy server by setting the `AI_PROXY_URL` environment variable:
204///
205/// ```bash
206/// export AI_PROXY_URL=http://proxy.example.com:8080
207/// ```
208///
209/// Supported proxy formats:
210/// - HTTP proxy: `http://proxy.example.com:8080`
211/// - HTTPS proxy: `https://proxy.example.com:8080`  
212/// - With authentication: `http://user:pass@proxy.example.com:8080`
213pub struct AiClient {
214    provider: Provider,
215    adapter: Box<dyn ChatApi>,
216    metrics: Arc<dyn Metrics>,
217    connection_options: Option<ConnectionOptions>,
218    #[cfg(feature = "interceptors")]
219    interceptor_pipeline: Option<crate::interceptors::InterceptorPipeline>,
220    // Custom default models (override provider defaults)
221    custom_default_chat_model: Option<String>,
222    custom_default_multimodal_model: Option<String>,
223    // Optional backpressure controller
224    backpressure: Option<Arc<BackpressureController>>,
225    // Optional basic failover providers (OSS): if a request fails with a retryable error,
226    // we will try providers in this list in order.
227    failover_providers: Option<Vec<Provider>>,
228    #[cfg(feature = "routing_mvp")]
229    routing_array: Option<crate::provider::models::ModelArray>,
230}
231
232impl AiClient {
233    /// Get the effective default chat model for this client (honors custom override)
234    pub fn default_chat_model(&self) -> String {
235        self.custom_default_chat_model
236            .clone()
237            .unwrap_or_else(|| self.provider.default_chat_model().to_string())
238    }
239    /// Create a new AI client
240    ///
241    /// # Arguments
242    /// * `provider` - The AI model provider to use
243    ///
244    /// # Returns
245    /// * `Result<Self, AiLibError>` - Client instance on success, error on failure
246    ///
247    /// # Example
248    /// ```rust
249    /// use ai_lib::{AiClient, Provider};
250    ///
251    /// let client = AiClient::new(Provider::Groq)?;
252    /// # Ok::<(), ai_lib::AiLibError>(())
253    /// ```
254    pub fn new(provider: Provider) -> Result<Self, AiLibError> {
255        // Use the new builder to create client with automatic environment variable detection
256        let mut c = AiClientBuilder::new(provider).build()?;
257        c.connection_options = None;
258        Ok(c)
259    }
260
261    /// Configure a basic failover chain for automatic provider switching on failures.
262    ///
263    /// When a request fails with a retryable error (network, timeout, rate limit, 5xx),
264    /// the client will automatically attempt subsequent providers in the specified order.
265    ///
266    /// ## Execution Behavior
267    /// - **Error Types**: Only retryable errors trigger failover (network, timeout, rate limit, 5xx)
268    /// - **Order**: Providers are tried in the exact order specified in the vector
269    /// - **Skip Current**: The current provider is automatically skipped if it appears in the chain
270    /// - **State Preservation**: Routing selections and request modifications are preserved during failover
271    ///
272    /// ## Integration with Other Features
273    /// - **Routing**: When used with `with_routing_array()`, the selected model is preserved
274    ///   across failover attempts. Failover providers will use the same model selection.
275    /// - **Interceptors**: Failover happens after interceptor processing, so interceptors
276    ///   can modify requests before failover attempts.
277    /// - **Metrics**: Failover attempts are tracked with `failover.attempts`, `failover.success`,
278    ///   and `failover.error` metrics.
279    ///
280    /// ## Examples
281    /// ```rust
282    /// use ai_lib::{AiClient, Provider};
283    ///
284    /// // Basic failover configuration
285    /// let client = AiClient::new(Provider::OpenAI)?
286    ///     .with_failover(vec![Provider::Anthropic, Provider::Groq]);
287    ///
288    /// // Combined with routing (requires routing_mvp feature)
289    /// #[cfg(feature = "routing_mvp")]
290    /// {
291    ///     let mut array = ai_lib::provider::models::ModelArray::new("production");
292    ///     // ... configure array
293    ///     let client = client
294    ///         .with_routing_array(array)
295    ///         .with_failover(vec![Provider::Anthropic, Provider::Groq]);
296    /// }
297    ///
298    /// // Empty vector disables failover
299    /// let client = client.with_failover(vec![]);
300    /// # Ok::<(), ai_lib::AiLibError>(())
301    /// ```
302    ///
303    /// ## Limitations (OSS)
304    /// This is a lightweight OSS feature. For advanced capabilities, consider ai-lib-pro:
305    /// - Weighted failover based on provider performance
306    /// - SLO-aware failover policies
307    /// - Cost-based failover decisions
308    /// - Advanced health checking and circuit breaking
309    ///
310    /// # Arguments
311    /// * `providers` - Ordered list of fallback providers. Empty vector disables failover.
312    ///
313    /// # Returns
314    /// * `Self` - Client instance with failover configuration
315    pub fn with_failover(mut self, providers: Vec<Provider>) -> Self {
316        self.failover_providers = if providers.is_empty() { None } else { Some(providers) };
317        self
318    }
319
320    #[cfg(feature = "routing_mvp")]
321    /// Set a routing model array to enable basic endpoint selection before requests.
322    pub fn with_routing_array(mut self, array: crate::provider::models::ModelArray) -> Self {
323        self.routing_array = Some(array);
324        self
325    }
326
327    // #[cfg(feature = "routing_mvp")]
328    // fn select_routed_model(&mut self, fallback: &str) -> String {
329    //     if let Some(arr) = self.routing_array.as_mut() {
330    //         if let Some(ep) = arr.select_endpoint() {
331    //             return ep.model_name.clone();
332    //         }
333    //     }
334    //     fallback.to_string()
335    // }
336
337    /// Create client with minimal explicit options (base_url/proxy/timeout). Not all providers
338    /// support overrides; unsupported providers ignore unspecified fields gracefully.
339    pub fn with_options(provider: Provider, opts: ConnectionOptions) -> Result<Self, AiLibError> {
340        let config_driven = provider.is_config_driven();
341        let need_builder = config_driven
342            && (opts.base_url.is_some()
343                || opts.proxy.is_some()
344                || opts.timeout.is_some()
345                || opts.disable_proxy);
346        if need_builder {
347            let mut b = AiClient::builder(provider);
348            if let Some(ref base) = opts.base_url {
349                b = b.with_base_url(base);
350            }
351            if opts.disable_proxy {
352                b = b.without_proxy();
353            } else if let Some(ref proxy) = opts.proxy {
354                if proxy.is_empty() {
355                    b = b.without_proxy();
356                } else {
357                    b = b.with_proxy(Some(proxy));
358                }
359            }
360            if let Some(t) = opts.timeout {
361                b = b.with_timeout(t);
362            }
363            let mut client = b.build()?;
364            // If api_key override + generic provider path: re-wrap adapter using override
365            if opts.api_key.is_some() {
366                // Only applies to config-driven generic adapter providers
367                let new_adapter: Option<Box<dyn ChatApi>> = match provider {
368                    Provider::Groq => Some(Box::new(GenericAdapter::new_with_api_key(
369                        ProviderConfigs::groq(),
370                        opts.api_key.clone(),
371                    )?)),
372                    Provider::XaiGrok => Some(Box::new(GenericAdapter::new_with_api_key(
373                        ProviderConfigs::xai_grok(),
374                        opts.api_key.clone(),
375                    )?)),
376                    Provider::Ollama => Some(Box::new(GenericAdapter::new_with_api_key(
377                        ProviderConfigs::ollama(),
378                        opts.api_key.clone(),
379                    )?)),
380                    Provider::DeepSeek => Some(Box::new(GenericAdapter::new_with_api_key(
381                        ProviderConfigs::deepseek(),
382                        opts.api_key.clone(),
383                    )?)),
384                    Provider::Qwen => Some(Box::new(GenericAdapter::new_with_api_key(
385                        ProviderConfigs::qwen(),
386                        opts.api_key.clone(),
387                    )?)),
388                    Provider::BaiduWenxin => Some(Box::new(GenericAdapter::new_with_api_key(
389                        ProviderConfigs::baidu_wenxin(),
390                        opts.api_key.clone(),
391                    )?)),
392                    Provider::TencentHunyuan => Some(Box::new(GenericAdapter::new_with_api_key(
393                        ProviderConfigs::tencent_hunyuan(),
394                        opts.api_key.clone(),
395                    )?)),
396                    Provider::IflytekSpark => Some(Box::new(GenericAdapter::new_with_api_key(
397                        ProviderConfigs::iflytek_spark(),
398                        opts.api_key.clone(),
399                    )?)),
400                    Provider::Moonshot => Some(Box::new(GenericAdapter::new_with_api_key(
401                        ProviderConfigs::moonshot(),
402                        opts.api_key.clone(),
403                    )?)),
404                    Provider::Anthropic => Some(Box::new(GenericAdapter::new_with_api_key(
405                        ProviderConfigs::anthropic(),
406                        opts.api_key.clone(),
407                    )?)),
408                    Provider::AzureOpenAI => Some(Box::new(GenericAdapter::new_with_api_key(
409                        ProviderConfigs::azure_openai(),
410                        opts.api_key.clone(),
411                    )?)),
412                    Provider::HuggingFace => Some(Box::new(GenericAdapter::new_with_api_key(
413                        ProviderConfigs::huggingface(),
414                        opts.api_key.clone(),
415                    )?)),
416                    Provider::TogetherAI => Some(Box::new(GenericAdapter::new_with_api_key(
417                        ProviderConfigs::together_ai(),
418                        opts.api_key.clone(),
419                    )?)),
420                    _ => None,
421                };
422                if let Some(a) = new_adapter {
423                    client.adapter = a;
424                }
425            }
426            client.connection_options = Some(opts);
427            return Ok(client);
428        }
429
430        // Independent adapters: OpenAI / Gemini / Mistral / Cohere
431        if provider.is_independent() {
432            let adapter: Box<dyn ChatApi> = match provider {
433                Provider::OpenAI => {
434                    if let Some(ref k) = opts.api_key {
435                        let inner =
436                            OpenAiAdapter::new_with_overrides(k.clone(), opts.base_url.clone())?;
437                        Box::new(inner)
438                    } else {
439                        let inner = OpenAiAdapter::new()?;
440                        Box::new(inner)
441                    }
442                }
443                Provider::Gemini => {
444                    if let Some(ref k) = opts.api_key {
445                        let inner =
446                            GeminiAdapter::new_with_overrides(k.clone(), opts.base_url.clone())?;
447                        Box::new(inner)
448                    } else {
449                        let inner = GeminiAdapter::new()?;
450                        Box::new(inner)
451                    }
452                }
453                Provider::Mistral => {
454                    if opts.api_key.is_some() || opts.base_url.is_some() {
455                        let inner = MistralAdapter::new_with_overrides(
456                            opts.api_key.clone(),
457                            opts.base_url.clone(),
458                        )?;
459                        Box::new(inner)
460                    } else {
461                        let inner = MistralAdapter::new()?;
462                        Box::new(inner)
463                    }
464                }
465                Provider::Cohere => {
466                    if let Some(ref k) = opts.api_key {
467                        let inner =
468                            CohereAdapter::new_with_overrides(k.clone(), opts.base_url.clone())?;
469                        Box::new(inner)
470                    } else {
471                        let inner = CohereAdapter::new()?;
472                        Box::new(inner)
473                    }
474                }
475                _ => unreachable!(),
476            };
477            return Ok(AiClient {
478                provider,
479                adapter,
480                metrics: Arc::new(NoopMetrics::new()),
481                connection_options: Some(opts),
482                custom_default_chat_model: None,
483                custom_default_multimodal_model: None,
484                backpressure: None,
485                failover_providers: None,
486                #[cfg(feature = "routing_mvp")]
487                routing_array: None,
488                #[cfg(feature = "interceptors")]
489                interceptor_pipeline: None,
490            });
491        }
492
493        // Simple config-driven without overrides
494        let mut client = AiClient::new(provider)?;
495        if let Some(ref k) = opts.api_key {
496            let override_adapter: Option<Box<dyn ChatApi>> = match provider {
497                Provider::Groq => Some(Box::new(GenericAdapter::new_with_api_key(
498                    ProviderConfigs::groq(),
499                    Some(k.clone()),
500                )?)),
501                Provider::XaiGrok => Some(Box::new(GenericAdapter::new_with_api_key(
502                    ProviderConfigs::xai_grok(),
503                    Some(k.clone()),
504                )?)),
505                Provider::Ollama => Some(Box::new(GenericAdapter::new_with_api_key(
506                    ProviderConfigs::ollama(),
507                    Some(k.clone()),
508                )?)),
509                Provider::DeepSeek => Some(Box::new(GenericAdapter::new_with_api_key(
510                    ProviderConfigs::deepseek(),
511                    Some(k.clone()),
512                )?)),
513                Provider::Qwen => Some(Box::new(GenericAdapter::new_with_api_key(
514                    ProviderConfigs::qwen(),
515                    Some(k.clone()),
516                )?)),
517                Provider::BaiduWenxin => Some(Box::new(GenericAdapter::new_with_api_key(
518                    ProviderConfigs::baidu_wenxin(),
519                    Some(k.clone()),
520                )?)),
521                Provider::TencentHunyuan => Some(Box::new(GenericAdapter::new_with_api_key(
522                    ProviderConfigs::tencent_hunyuan(),
523                    Some(k.clone()),
524                )?)),
525                Provider::IflytekSpark => Some(Box::new(GenericAdapter::new_with_api_key(
526                    ProviderConfigs::iflytek_spark(),
527                    Some(k.clone()),
528                )?)),
529                Provider::Moonshot => Some(Box::new(GenericAdapter::new_with_api_key(
530                    ProviderConfigs::moonshot(),
531                    Some(k.clone()),
532                )?)),
533                Provider::Anthropic => Some(Box::new(GenericAdapter::new_with_api_key(
534                    ProviderConfigs::anthropic(),
535                    Some(k.clone()),
536                )?)),
537                Provider::AzureOpenAI => Some(Box::new(GenericAdapter::new_with_api_key(
538                    ProviderConfigs::azure_openai(),
539                    Some(k.clone()),
540                )?)),
541                Provider::HuggingFace => Some(Box::new(GenericAdapter::new_with_api_key(
542                    ProviderConfigs::huggingface(),
543                    Some(k.clone()),
544                )?)),
545                Provider::TogetherAI => Some(Box::new(GenericAdapter::new_with_api_key(
546                    ProviderConfigs::together_ai(),
547                    Some(k.clone()),
548                )?)),
549                _ => None,
550            };
551            if let Some(a) = override_adapter {
552                client.adapter = a;
553            }
554        }
555        client.connection_options = Some(opts);
556        Ok(client)
557    }
558
559    pub fn connection_options(&self) -> Option<&ConnectionOptions> {
560        self.connection_options.as_ref()
561    }
562
563    /// Create a new AI client builder
564    ///
565    /// The builder pattern allows more flexible client configuration:
566    /// - Automatic environment variable detection
567    /// - Support for custom base_url and proxy
568    /// - Support for custom timeout and connection pool configuration
569    ///
570    /// # Arguments
571    /// * `provider` - The AI model provider to use
572    ///
573    /// # Returns
574    /// * `AiClientBuilder` - Builder instance
575    ///
576    /// # Example
577    /// ```rust
578    /// use ai_lib::{AiClient, Provider};
579    ///
580    /// // Simplest usage - automatic environment variable detection
581    /// let client = AiClient::builder(Provider::Groq).build()?;
582    ///
583    /// // Custom base_url and proxy
584    /// let client = AiClient::builder(Provider::Groq)
585    ///     .with_base_url("https://custom.groq.com")
586    ///     .with_proxy(Some("http://proxy.example.com:8080"))
587    ///     .build()?;
588    /// # Ok::<(), ai_lib::AiLibError>(())
589    /// ```
590    pub fn builder(provider: Provider) -> AiClientBuilder {
591        AiClientBuilder::new(provider)
592    }
593
594    /// Create AiClient with injected metrics implementation
595    pub fn new_with_metrics(
596        provider: Provider,
597        metrics: Arc<dyn Metrics>,
598    ) -> Result<Self, AiLibError> {
599        let adapter: Box<dyn ChatApi> = match provider {
600            Provider::Groq => Box::new(GenericAdapter::new(ProviderConfigs::groq())?),
601            Provider::XaiGrok => Box::new(GenericAdapter::new(ProviderConfigs::xai_grok())?),
602            Provider::Ollama => Box::new(GenericAdapter::new(ProviderConfigs::ollama())?),
603            Provider::DeepSeek => Box::new(GenericAdapter::new(ProviderConfigs::deepseek())?),
604            Provider::Qwen => Box::new(GenericAdapter::new(ProviderConfigs::qwen())?),
605            Provider::Anthropic => Box::new(GenericAdapter::new(ProviderConfigs::anthropic())?),
606            Provider::BaiduWenxin => {
607                Box::new(GenericAdapter::new(ProviderConfigs::baidu_wenxin())?)
608            }
609            Provider::TencentHunyuan => {
610                Box::new(GenericAdapter::new(ProviderConfigs::tencent_hunyuan())?)
611            }
612            Provider::IflytekSpark => {
613                Box::new(GenericAdapter::new(ProviderConfigs::iflytek_spark())?)
614            }
615            Provider::Moonshot => Box::new(GenericAdapter::new(ProviderConfigs::moonshot())?),
616            Provider::AzureOpenAI => {
617                Box::new(GenericAdapter::new(ProviderConfigs::azure_openai())?)
618            }
619            Provider::HuggingFace => Box::new(GenericAdapter::new(ProviderConfigs::huggingface())?),
620            Provider::TogetherAI => Box::new(GenericAdapter::new(ProviderConfigs::together_ai())?),
621            Provider::OpenRouter => Box::new(GenericAdapter::new(ProviderConfigs::openrouter())?),
622            Provider::Replicate => Box::new(GenericAdapter::new(ProviderConfigs::replicate())?),
623            Provider::ZhipuAI => Box::new(GenericAdapter::new(ProviderConfigs::zhipu_ai())?),
624            Provider::MiniMax => Box::new(GenericAdapter::new(ProviderConfigs::minimax())?),
625            Provider::OpenAI => Box::new(OpenAiAdapter::new()?),
626            Provider::Gemini => Box::new(GeminiAdapter::new()?),
627            Provider::Mistral => Box::new(MistralAdapter::new()?),
628            Provider::Cohere => Box::new(CohereAdapter::new()?),
629            Provider::Perplexity => Box::new(PerplexityAdapter::new()?),
630            Provider::AI21 => Box::new(AI21Adapter::new()?),
631        };
632
633        Ok(Self {
634            provider,
635            adapter,
636            metrics,
637            connection_options: None,
638            custom_default_chat_model: None,
639            custom_default_multimodal_model: None,
640            backpressure: None,
641            failover_providers: None,
642            #[cfg(feature = "routing_mvp")]
643            routing_array: None,
644            #[cfg(feature = "interceptors")]
645            interceptor_pipeline: None,
646        })
647    }
648
649    /// Set metrics implementation on client
650    pub fn with_metrics(mut self, metrics: Arc<dyn Metrics>) -> Self {
651        self.metrics = metrics;
652        self
653    }
654
655    /// Send chat completion request
656    ///
657    /// This method supports multiple routing and failover strategies that work together:
658    ///
659    /// ## Execution Flow
660    /// 1. **Routing (if enabled)**: If `request.model == "__route__"` and `routing_mvp` feature is enabled,
661    ///    the client will select the best available model from the configured `ModelArray` based on
662    ///    health checks and load balancing strategy.
663    /// 2. **Request Execution**: The request is sent to the current provider with the selected model.
664    /// 3. **Failover (if enabled)**: If the request fails with a retryable error (network, timeout, 
665    ///    rate limit, 5xx), the client will automatically try the configured failover providers
666    ///    in order, preserving the routing selection.
667    ///
668    /// ## Feature Interaction
669    /// - **Routing + Failover**: When both are configured, routing selection is preserved during
670    ///   failover attempts. The failover providers will use the same model selection.
671    /// - **Interceptors**: Applied after routing but before failover, allowing for request/response
672    ///   modification and monitoring.
673    ///
674    /// ## Examples
675    /// ```rust
676    /// use ai_lib::{AiClient, Provider, ChatCompletionRequest, Message, Role};
677    /// use ai_lib::types::common::Content;
678    ///
679    /// // Basic usage with failover
680    /// let client = AiClient::new(Provider::OpenAI)?
681    ///     .with_failover(vec![Provider::Anthropic, Provider::Groq]);
682    ///
683    /// // With routing (requires routing_mvp feature)
684    /// #[cfg(feature = "routing_mvp")]
685    /// {
686    ///     let mut array = ai_lib::provider::models::ModelArray::new("production");
687    ///     // ... configure array with endpoints
688    ///     let client = client.with_routing_array(array);
689    /// }
690    ///
691    /// // Request with routing (will select best model from array)
692    /// let request = ChatCompletionRequest::new(
693    ///     "__route__".to_string(),  // Special sentinel for routing
694    ///     vec![Message {
695    ///         role: Role::User,
696    ///         content: Content::Text("Hello".to_string()),
697    ///         function_call: None,
698    ///     }],
699    /// );
700    /// # Ok::<(), ai_lib::AiLibError>(())
701    /// ```
702    ///
703    /// # Arguments
704    /// * `request` - Chat completion request
705    ///
706    /// # Returns
707    /// * `Result<ChatCompletionResponse, AiLibError>` - Response on success, error on failure
708    #[allow(unused_mut)]
709    pub async fn chat_completion(
710        &self,
711        request: ChatCompletionRequest,
712    ) -> Result<ChatCompletionResponse, AiLibError> {
713        // Acquire backpressure permit if configured
714        let _bp_permit: Option<BackpressurePermit> = if let Some(ctrl) = &self.backpressure {
715            match ctrl.acquire_permit().await {
716                Ok(p) => Some(p),
717                Err(_) => {
718                    return Err(AiLibError::RateLimitExceeded(
719                        "Backpressure: no permits available".to_string(),
720                    ))
721                }
722            }
723        } else {
724            None
725        };
726        // Step 1: Apply routing if configured and model is "__route__"
727        // Note: processed_request needs to be mut because we modify it in the routing_mvp block
728        let mut processed_request = request;
729        #[cfg(feature = "routing_mvp")]
730        {
731            if processed_request.model == "__route__" {
732                let _ = self.metrics.incr_counter("routing_mvp.request", 1).await;
733                let chosen = self.provider.default_chat_model().to_string();
734                if let Some(arr) = &self.routing_array {
735                    let mut arr_clone = arr.clone();
736                    if let Some(ep) = arr_clone.select_endpoint() {
737                        match crate::provider::utils::health_check(&ep.url).await {
738                            Ok(()) => {
739                                let _ = self.metrics.incr_counter("routing_mvp.selected", 1).await;
740                                chosen = ep.model_name.clone();
741                            }
742                            Err(_) => {
743                                let _ = self
744                                    .metrics
745                                    .incr_counter("routing_mvp.health_fail", 1)
746                                    .await;
747                                chosen = self.provider.default_chat_model().to_string();
748                                let _ = self
749                                    .metrics
750                                    .incr_counter("routing_mvp.fallback_default", 1)
751                                    .await;
752                            }
753                        }
754                    } else {
755                        let _ = self
756                            .metrics
757                            .incr_counter("routing_mvp.no_endpoint", 1)
758                            .await;
759                    }
760                } else {
761                    let _ = self
762                        .metrics
763                        .incr_counter("routing_mvp.missing_array", 1)
764                        .await;
765                }
766                processed_request.model = chosen;
767            }
768        }
769
770        // Step 2: Execute request with potential failover
771        let execute_request = || async {
772        #[cfg(feature = "interceptors")]
773        if let Some(p) = &self.interceptor_pipeline {
774            let ctx = crate::interceptors::RequestContext {
775                provider: format!("{:?}", self.provider).to_lowercase(),
776                    model: processed_request.model.clone(),
777            };
778            return p
779                    .execute(&ctx, &processed_request, || self.adapter.chat_completion(processed_request.clone()))
780                .await;
781        }
782            self.adapter.chat_completion(processed_request.clone()).await
783        };
784
785        match execute_request().await {
786            Ok(resp) => Ok(resp),
787            Err(e) => {
788                // Step 3: Try failover if configured and error is retryable
789                if let Some(chain) = &self.failover_providers {
790                    if e.is_retryable() || matches!(e, AiLibError::TimeoutError(_)) {
791                        for p in chain {
792                            // Skip if same as current
793                            if *p == self.provider { continue; }
794                            let _ = self.metrics.incr_counter("failover.attempts", 1).await;
795                            
796                            // Create failover client with same configuration
797                            let failover_client = AiClient::new_with_metrics(*p, self.metrics.clone())?;
798                            
799                            // Apply same routing logic to failover request if needed
800                            let mut failover_request = processed_request.clone();
801                            #[cfg(feature = "routing_mvp")]
802                            {
803                                // For failover, we don't re-apply routing since the model was already chosen
804                                // or we use the failover provider's default model
805                                if failover_request.model == "__route__" {
806                                    failover_request.model = p.default_chat_model().to_string();
807                                }
808                            }
809                            
810                            match failover_client.adapter.chat_completion(failover_request).await {
811                                Ok(r) => {
812                                    let _ = self.metrics.incr_counter("failover.success", 1).await;
813                                    return Ok(r);
814                                }
815                                Err(e2) => {
816                                    let _ = self.metrics.incr_counter("failover.error", 1).await;
817                                    // Continue to next provider on retryable
818                                    if !(e2.is_retryable() || matches!(e2, AiLibError::TimeoutError(_))) {
819                                        return Err(e2);
820                                    }
821                                }
822                            }
823                        }
824                    }
825                }
826                Err(e)
827            }
828        }
829    }
830
831    /// Streaming chat completion request
832    ///
833    /// This method provides the same routing and failover capabilities as `chat_completion()`,
834    /// but returns a streaming response for real-time processing.
835    ///
836    /// ## Execution Flow
837    /// 1. **Routing (if enabled)**: If `request.model == "__route__"` and `routing_mvp` feature is enabled,
838    ///    the client will select the best available model from the configured `ModelArray` based on
839    ///    health checks and load balancing strategy.
840    /// 2. **Stream Request Execution**: The streaming request is sent to the current provider with the selected model.
841    /// 3. **Failover (if enabled)**: If the stream request fails with a retryable error (network, timeout, 
842    ///    rate limit, 5xx), the client will automatically try the configured failover providers
843    ///    in order, preserving the routing selection.
844    ///
845    /// ## Feature Interaction
846    /// - **Routing + Failover**: When both are configured, routing selection is preserved during
847    ///   failover attempts. The failover providers will use the same model selection.
848    /// - **Interceptors**: Applied after routing but before failover, allowing for request/response
849    ///   modification and monitoring.
850    /// - **Backpressure**: The configured backpressure controller is applied to the final stream.
851    ///
852    /// ## Examples
853    /// ```rust
854    /// use ai_lib::{AiClient, Provider, ChatCompletionRequest, Message, Role};
855    /// use ai_lib::types::common::Content;
856    /// use futures::stream::StreamExt;
857    ///
858    /// # async fn example() -> Result<(), ai_lib::AiLibError> {
859    /// // Basic usage with failover
860    /// let client = AiClient::new(Provider::OpenAI)?
861    ///     .with_failover(vec![Provider::Anthropic, Provider::Groq]);
862    ///
863    /// // With routing (requires routing_mvp feature)
864    /// #[cfg(feature = "routing_mvp")]
865    /// {
866    ///     let mut array = ai_lib::provider::models::ModelArray::new("production");
867    ///     // ... configure array with endpoints
868    ///     let client = client.with_routing_array(array);
869    /// }
870    ///
871    /// // Streaming request with routing (will select best model from array)
872    /// let request = ChatCompletionRequest::new(
873    ///     "__route__".to_string(),  // Special sentinel for routing
874    ///     vec![Message {
875    ///         role: Role::User,
876    ///         content: Content::Text("Hello".to_string()),
877    ///         function_call: None,
878    ///     }],
879    /// );
880    ///
881    /// let mut stream = client.chat_completion_stream(request).await?;
882    /// while let Some(chunk) = stream.next().await {
883    ///     match chunk {
884    ///         Ok(chunk) => println!("Received: {:?}", chunk),
885    ///         Err(e) => eprintln!("Stream error: {}", e),
886    ///     }
887    /// }
888    /// # Ok(())
889    /// # }
890    /// ```
891    ///
892    /// # Arguments
893    /// * `request` - Chat completion request
894    ///
895    /// # Returns
896    /// * `Result<impl Stream<Item = Result<ChatCompletionChunk, AiLibError>>, AiLibError>` - Stream response on success
897    #[allow(unused_mut)]
898    pub async fn chat_completion_stream(
899        &self,
900        mut request: ChatCompletionRequest,
901    ) -> Result<
902        Box<dyn Stream<Item = Result<ChatCompletionChunk, AiLibError>> + Send + Unpin>,
903        AiLibError,
904    > {
905        request.stream = Some(true);
906        // Acquire backpressure permit if configured and hold it for the lifetime of the stream
907        let bp_permit: Option<BackpressurePermit> = if let Some(ctrl) = &self.backpressure {
908            match ctrl.acquire_permit().await {
909                Ok(p) => Some(p),
910                Err(_) => {
911                    return Err(AiLibError::RateLimitExceeded(
912                        "Backpressure: no permits available".to_string(),
913                    ))
914                }
915            }
916        } else {
917            None
918        };
919        // Step 1: Apply routing if configured and model is "__route__"
920        // Note: processed_request needs to be mut because we modify it in the routing_mvp block
921        let mut processed_request = request;
922        #[cfg(feature = "routing_mvp")]
923        {
924            if processed_request.model == "__route__" {
925                let _ = self.metrics.incr_counter("routing_mvp.request", 1).await;
926                let chosen = self.provider.default_chat_model().to_string();
927                if let Some(arr) = &self.routing_array {
928                    let mut arr_clone = arr.clone();
929                    if let Some(ep) = arr_clone.select_endpoint() {
930                        match crate::provider::utils::health_check(&ep.url).await {
931                            Ok(()) => {
932                                let _ = self.metrics.incr_counter("routing_mvp.selected", 1).await;
933                                chosen = ep.model_name.clone();
934                            }
935                            Err(_) => {
936                                let _ = self
937                                    .metrics
938                                    .incr_counter("routing_mvp.health_fail", 1)
939                                    .await;
940                                chosen = self.provider.default_chat_model().to_string();
941                                let _ = self
942                                    .metrics
943                                    .incr_counter("routing_mvp.fallback_default", 1)
944                                    .await;
945                            }
946                        }
947                    } else {
948                        let _ = self
949                            .metrics
950                            .incr_counter("routing_mvp.no_endpoint", 1)
951                            .await;
952                    }
953                } else {
954                    let _ = self
955                        .metrics
956                        .incr_counter("routing_mvp.missing_array", 1)
957                        .await;
958                }
959                processed_request.model = chosen;
960            }
961        }
962
963        // Step 2: Execute stream request with potential failover
964        let execute_stream = || async {
965        #[cfg(feature = "interceptors")]
966        if let Some(p) = &self.interceptor_pipeline {
967            let ctx = crate::interceptors::RequestContext {
968                provider: format!("{:?}", self.provider).to_lowercase(),
969                    model: processed_request.model.clone(),
970            };
971                // Wrap stream request by executing core first and then mapping events
972            for ic in &p.interceptors {
973                    ic.on_request(&ctx, &processed_request).await;
974                }
975                return self.adapter.chat_completion_stream(processed_request).await;
976            }
977            self.adapter.chat_completion_stream(processed_request.clone()).await
978        };
979
980        match execute_stream().await {
981            Ok(inner) => {
982        let cs = ControlledStream::new_with_bp(inner, None, bp_permit);
983        Ok(Box::new(cs))
984            }
985            Err(e) => {
986                // Step 3: Try failover if configured and error is retryable
987                if let Some(chain) = &self.failover_providers {
988                    if e.is_retryable() || matches!(e, AiLibError::TimeoutError(_)) {
989                        for p in chain {
990                            if *p == self.provider { continue; }
991                            let _ = self.metrics.incr_counter("failover.attempts", 1).await;
992                            
993                            // Create failover client with same configuration
994                            let failover_client = AiClient::new_with_metrics(*p, self.metrics.clone())?;
995                            
996                            // Apply same routing logic to failover request if needed
997                            let mut failover_request = processed_request.clone();
998                            #[cfg(feature = "routing_mvp")]
999                            {
1000                                // For failover, we don't re-apply routing since the model was already chosen
1001                                // or we use the failover provider's default model
1002                                if failover_request.model == "__route__" {
1003                                    failover_request.model = p.default_chat_model().to_string();
1004                                }
1005                            }
1006                            
1007                            match failover_client.adapter.chat_completion_stream(failover_request).await {
1008                                Ok(inner) => {
1009                                    let _ = self.metrics.incr_counter("failover.success", 1).await;
1010                                    let cs = ControlledStream::new_with_bp(inner, None, bp_permit);
1011                                    return Ok(Box::new(cs));
1012                                }
1013                                Err(e2) => {
1014                                    let _ = self.metrics.incr_counter("failover.error", 1).await;
1015                                    if !(e2.is_retryable() || matches!(e2, AiLibError::TimeoutError(_))) {
1016                                        return Err(e2);
1017                                    }
1018                                }
1019                            }
1020                        }
1021                    }
1022                }
1023                Err(e)
1024            }
1025        }
1026    }
1027
1028    /// Streaming chat completion request with cancel control
1029    ///
1030    /// # Arguments
1031    /// * `request` - Chat completion request
1032    ///
1033    /// # Returns
1034    /// * `Result<(impl Stream<Item = Result<ChatCompletionChunk, AiLibError>> + Send + Unpin, CancelHandle), AiLibError>` - Returns streaming response and cancel handle on success
1035    pub async fn chat_completion_stream_with_cancel(
1036        &self,
1037        mut request: ChatCompletionRequest,
1038    ) -> Result<
1039        (
1040            Box<dyn Stream<Item = Result<ChatCompletionChunk, AiLibError>> + Send + Unpin>,
1041            CancelHandle,
1042        ),
1043        AiLibError,
1044    > {
1045        request.stream = Some(true);
1046        // Acquire backpressure permit if configured and hold it for the lifetime of the stream
1047        let bp_permit: Option<BackpressurePermit> = if let Some(ctrl) = &self.backpressure {
1048            match ctrl.acquire_permit().await {
1049                Ok(p) => Some(p),
1050                Err(_) => {
1051                    return Err(AiLibError::RateLimitExceeded(
1052                        "Backpressure: no permits available".to_string(),
1053                    ))
1054                }
1055            }
1056        } else {
1057            None
1058        };
1059        let stream = self.adapter.chat_completion_stream(request).await?;
1060        let (cancel_tx, cancel_rx) = oneshot::channel();
1061        let cancel_handle = CancelHandle {
1062            sender: Some(cancel_tx),
1063        };
1064
1065        let controlled_stream = ControlledStream::new_with_bp(stream, Some(cancel_rx), bp_permit);
1066        Ok((Box::new(controlled_stream), cancel_handle))
1067    }
1068
1069    /// Batch chat completion requests
1070    ///
1071    /// # Arguments
1072    /// * `requests` - List of chat completion requests
1073    /// * `concurrency_limit` - Maximum concurrent request count (None means unlimited)
1074    ///
1075    /// # Returns
1076    /// * `Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError>` - Returns response results for all requests
1077    ///
1078    /// # Example
1079    /// ```rust
1080    /// use ai_lib::{AiClient, Provider, ChatCompletionRequest, Message, Role};
1081    /// use ai_lib::types::common::Content;
1082    ///
1083    /// #[tokio::main]
1084    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
1085    ///     let client = AiClient::new(Provider::Groq)?;
1086    ///     
1087    ///     let requests = vec![
1088    ///         ChatCompletionRequest::new(
1089    ///             "llama3-8b-8192".to_string(),
1090    ///             vec![Message {
1091    ///                 role: Role::User,
1092    ///                 content: Content::Text("Hello".to_string()),
1093    ///                 function_call: None,
1094    ///             }],
1095    ///         ),
1096    ///         ChatCompletionRequest::new(
1097    ///             "llama3-8b-8192".to_string(),
1098    ///             vec![Message {
1099    ///                 role: Role::User,
1100    ///                 content: Content::Text("How are you?".to_string()),
1101    ///                 function_call: None,
1102    ///             }],
1103    ///         ),
1104    ///     ];
1105    ///     
1106    ///     // Limit concurrency to 5
1107    ///     let responses = client.chat_completion_batch(requests, Some(5)).await?;
1108    ///     
1109    ///     for (i, response) in responses.iter().enumerate() {
1110    ///         match response {
1111    ///             Ok(resp) => println!("Request {}: {}", i, resp.choices[0].message.content.as_text()),
1112    ///             Err(e) => println!("Request {} failed: {}", i, e),
1113    ///         }
1114    ///     }
1115    ///     
1116    ///     Ok(())
1117    /// }
1118    /// ```
1119    pub async fn chat_completion_batch(
1120        &self,
1121        requests: Vec<ChatCompletionRequest>,
1122        concurrency_limit: Option<usize>,
1123    ) -> Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError> {
1124        self.adapter
1125            .chat_completion_batch(requests, concurrency_limit)
1126            .await
1127    }
1128
1129    /// Smart batch processing: automatically choose processing strategy based on request count
1130    ///
1131    /// # Arguments
1132    /// * `requests` - List of chat completion requests
1133    ///
1134    /// # Returns
1135    /// * `Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError>` - Returns response results for all requests
1136    pub async fn chat_completion_batch_smart(
1137        &self,
1138        requests: Vec<ChatCompletionRequest>,
1139    ) -> Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError> {
1140        // Use sequential processing for small batches, concurrent processing for large batches
1141        let concurrency_limit = if requests.len() <= 3 { None } else { Some(10) };
1142        self.chat_completion_batch(requests, concurrency_limit)
1143            .await
1144    }
1145
1146    /// Batch chat completion requests
1147    ///
1148    /// # Arguments
1149    /// * `requests` - List of chat completion requests
1150    /// * `concurrency_limit` - Maximum concurrent request count (None means unlimited)
1151    ///
1152    /// # Returns
1153    /// * `Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError>` - Returns response results for all requests
1154    ///
1155    /// # Example
1156    /// ```rust
1157    /// use ai_lib::{AiClient, Provider, ChatCompletionRequest, Message, Role};
1158    /// use ai_lib::types::common::Content;
1159    ///
1160    /// #[tokio::main]
1161    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
1162    ///     let client = AiClient::new(Provider::Groq)?;
1163    ///     
1164    ///     let requests = vec![
1165    ///         ChatCompletionRequest::new(
1166    ///             "llama3-8b-8192".to_string(),
1167    ///             vec![Message {
1168    ///                 role: Role::User,
1169    ///                 content: Content::Text("Hello".to_string()),
1170    ///                 function_call: None,
1171    ///             }],
1172    ///         ),
1173    ///         ChatCompletionRequest::new(
1174    ///             "llama3-8b-8192".to_string(),
1175    ///             vec![Message {
1176    ///                 role: Role::User,
1177    ///                 content: Content::Text("How are you?".to_string()),
1178    ///                 function_call: None,
1179    ///             }],
1180    ///         ),
1181    ///     ];
1182    ///     
1183    ///     // Limit concurrency to 5
1184    ///     let responses = client.chat_completion_batch(requests, Some(5)).await?;
1185    ///     
1186    ///     for (i, response) in responses.iter().enumerate() {
1187    ///         match response {
1188    ///             Ok(resp) => println!("Request {}: {}", i, resp.choices[0].message.content.as_text()),
1189    ///             Err(e) => println!("Request {} failed: {}", i, e),
1190    ///         }
1191    ///     }
1192    ///     
1193    ///     Ok(())
1194    /// }
1195    /// ```
1196    ///
1197    /// Get list of supported models
1198    ///
1199    /// # Returns
1200    /// * `Result<Vec<String>, AiLibError>` - Returns model list on success, error on failure
1201    pub async fn list_models(&self) -> Result<Vec<String>, AiLibError> {
1202        self.adapter.list_models().await
1203    }
1204
1205    /// Switch AI model provider
1206    ///
1207    /// # Arguments
1208    /// * `provider` - New provider
1209    ///
1210    /// # Returns
1211    /// * `Result<(), AiLibError>` - Returns () on success, error on failure
1212    ///
1213    /// # Example
1214    /// ```rust
1215    /// use ai_lib::{AiClient, Provider};
1216    ///
1217    /// let mut client = AiClient::new(Provider::Groq)?;
1218    /// // Switch from Groq to Groq (demonstrating switch functionality)
1219    /// client.switch_provider(Provider::Groq)?;
1220    /// # Ok::<(), ai_lib::AiLibError>(())
1221    /// ```
1222    pub fn switch_provider(&mut self, provider: Provider) -> Result<(), AiLibError> {
1223        let new_adapter: Box<dyn ChatApi> = match provider {
1224            Provider::Groq => Box::new(GenericAdapter::new(ProviderConfigs::groq())?),
1225            Provider::XaiGrok => Box::new(GenericAdapter::new(ProviderConfigs::xai_grok())?),
1226            Provider::Ollama => Box::new(GenericAdapter::new(ProviderConfigs::ollama())?),
1227            Provider::DeepSeek => Box::new(GenericAdapter::new(ProviderConfigs::deepseek())?),
1228            Provider::Qwen => Box::new(GenericAdapter::new(ProviderConfigs::qwen())?),
1229            Provider::OpenAI => Box::new(OpenAiAdapter::new()?),
1230            Provider::Anthropic => Box::new(GenericAdapter::new(ProviderConfigs::anthropic())?),
1231            Provider::BaiduWenxin => {
1232                Box::new(GenericAdapter::new(ProviderConfigs::baidu_wenxin())?)
1233            }
1234            Provider::TencentHunyuan => {
1235                Box::new(GenericAdapter::new(ProviderConfigs::tencent_hunyuan())?)
1236            }
1237            Provider::IflytekSpark => {
1238                Box::new(GenericAdapter::new(ProviderConfigs::iflytek_spark())?)
1239            }
1240            Provider::Moonshot => Box::new(GenericAdapter::new(ProviderConfigs::moonshot())?),
1241            Provider::Gemini => Box::new(GeminiAdapter::new()?),
1242            Provider::AzureOpenAI => {
1243                Box::new(GenericAdapter::new(ProviderConfigs::azure_openai())?)
1244            }
1245            Provider::HuggingFace => Box::new(GenericAdapter::new(ProviderConfigs::huggingface())?),
1246            Provider::TogetherAI => Box::new(GenericAdapter::new(ProviderConfigs::together_ai())?),
1247            Provider::OpenRouter => Box::new(GenericAdapter::new(ProviderConfigs::openrouter())?),
1248            Provider::Replicate => Box::new(GenericAdapter::new(ProviderConfigs::replicate())?),
1249            Provider::ZhipuAI => Box::new(GenericAdapter::new(ProviderConfigs::zhipu_ai())?),
1250            Provider::MiniMax => Box::new(GenericAdapter::new(ProviderConfigs::minimax())?),
1251            Provider::Mistral => Box::new(MistralAdapter::new()?),
1252            Provider::Cohere => Box::new(CohereAdapter::new()?),
1253            Provider::Perplexity => Box::new(PerplexityAdapter::new()?),
1254            Provider::AI21 => Box::new(AI21Adapter::new()?),
1255            // Provider::Bedrock => Box::new(BedrockAdapter::new()?),
1256        };
1257
1258        self.provider = provider;
1259        self.adapter = new_adapter;
1260        Ok(())
1261    }
1262
1263    /// Get current provider
1264    pub fn current_provider(&self) -> Provider {
1265        self.provider
1266    }
1267
1268    /// Convenience helper: construct a request with the provider's default chat model.
1269    /// This does NOT send the request.
1270    /// Uses custom default model if set via AiClientBuilder, otherwise uses provider default.
1271    pub fn build_simple_request<S: Into<String>>(&self, prompt: S) -> ChatCompletionRequest {
1272        let model = self
1273            .custom_default_chat_model
1274            .clone()
1275            .unwrap_or_else(|| self.provider.default_chat_model().to_string());
1276
1277        ChatCompletionRequest::new(
1278            model,
1279            vec![crate::types::Message {
1280                role: crate::types::Role::User,
1281                content: crate::types::common::Content::Text(prompt.into()),
1282                function_call: None,
1283            }],
1284        )
1285    }
1286
1287    /// Convenience helper: construct a request with an explicitly specified chat model.
1288    /// This does NOT send the request.
1289    pub fn build_simple_request_with_model<S: Into<String>>(
1290        &self,
1291        prompt: S,
1292        model: S,
1293    ) -> ChatCompletionRequest {
1294        ChatCompletionRequest::new(
1295            model.into(),
1296            vec![crate::types::Message {
1297                role: crate::types::Role::User,
1298                content: crate::types::common::Content::Text(prompt.into()),
1299                function_call: None,
1300            }],
1301        )
1302    }
1303
1304    /// Convenience helper: construct a request with the provider's default multimodal model.
1305    /// This does NOT send the request.
1306    /// Uses custom default model if set via AiClientBuilder, otherwise uses provider default.
1307    pub fn build_multimodal_request<S: Into<String>>(
1308        &self,
1309        prompt: S,
1310    ) -> Result<ChatCompletionRequest, AiLibError> {
1311        let model = self
1312            .custom_default_multimodal_model
1313            .clone()
1314            .or_else(|| {
1315                self.provider
1316                    .default_multimodal_model()
1317                    .map(|s| s.to_string())
1318            })
1319            .ok_or_else(|| {
1320                AiLibError::ConfigurationError(format!(
1321                    "No multimodal model available for provider {:?}",
1322                    self.provider
1323                ))
1324            })?;
1325
1326        Ok(ChatCompletionRequest::new(
1327            model,
1328            vec![crate::types::Message {
1329                role: crate::types::Role::User,
1330                content: crate::types::common::Content::Text(prompt.into()),
1331                function_call: None,
1332            }],
1333        ))
1334    }
1335
1336    /// Convenience helper: construct a request with an explicitly specified multimodal model.
1337    /// This does NOT send the request.
1338    pub fn build_multimodal_request_with_model<S: Into<String>>(
1339        &self,
1340        prompt: S,
1341        model: S,
1342    ) -> ChatCompletionRequest {
1343        ChatCompletionRequest::new(
1344            model.into(),
1345            vec![crate::types::Message {
1346                role: crate::types::Role::User,
1347                content: crate::types::common::Content::Text(prompt.into()),
1348                function_call: None,
1349            }],
1350        )
1351    }
1352
1353    /// One-shot helper: create a client for `provider`, send a single user prompt using the
1354    /// default chat model, and return plain text content (first choice).
1355    pub async fn quick_chat_text<P: Into<String>>(
1356        provider: Provider,
1357        prompt: P,
1358    ) -> Result<String, AiLibError> {
1359        let client = AiClient::new(provider)?;
1360        let req = client.build_simple_request(prompt.into());
1361        let resp = client.chat_completion(req).await?;
1362        resp.first_text().map(|s| s.to_string())
1363    }
1364
1365    /// One-shot helper: create a client for `provider`, send a single user prompt using an
1366    /// explicitly specified chat model, and return plain text content (first choice).
1367    pub async fn quick_chat_text_with_model<P: Into<String>, M: Into<String>>(
1368        provider: Provider,
1369        prompt: P,
1370        model: M,
1371    ) -> Result<String, AiLibError> {
1372        let client = AiClient::new(provider)?;
1373        let req = client.build_simple_request_with_model(prompt.into(), model.into());
1374        let resp = client.chat_completion(req).await?;
1375        resp.first_text().map(|s| s.to_string())
1376    }
1377
1378    /// One-shot helper: create a client for `provider`, send a single user prompt using the
1379    /// default multimodal model, and return plain text content (first choice).
1380    pub async fn quick_multimodal_text<P: Into<String>>(
1381        provider: Provider,
1382        prompt: P,
1383    ) -> Result<String, AiLibError> {
1384        let client = AiClient::new(provider)?;
1385        let req = client.build_multimodal_request(prompt.into())?;
1386        let resp = client.chat_completion(req).await?;
1387        resp.first_text().map(|s| s.to_string())
1388    }
1389
1390    /// One-shot helper: create a client for `provider`, send a single user prompt using an
1391    /// explicitly specified multimodal model, and return plain text content (first choice).
1392    pub async fn quick_multimodal_text_with_model<P: Into<String>, M: Into<String>>(
1393        provider: Provider,
1394        prompt: P,
1395        model: M,
1396    ) -> Result<String, AiLibError> {
1397        let client = AiClient::new(provider)?;
1398        let req = client.build_multimodal_request_with_model(prompt.into(), model.into());
1399        let resp = client.chat_completion(req).await?;
1400        resp.first_text().map(|s| s.to_string())
1401    }
1402
1403    /// One-shot helper with model options: create a client for `provider`, send a single user prompt
1404    /// using specified model options, and return plain text content (first choice).
1405    pub async fn quick_chat_text_with_options<P: Into<String>>(
1406        provider: Provider,
1407        prompt: P,
1408        options: ModelOptions,
1409    ) -> Result<String, AiLibError> {
1410        let client = AiClient::new(provider)?;
1411
1412        // Determine which model to use based on options
1413        let model = if let Some(chat_model) = options.chat_model {
1414            chat_model
1415        } else {
1416            provider.default_chat_model().to_string()
1417        };
1418
1419        let req = client.build_simple_request_with_model(prompt.into(), model);
1420        let resp = client.chat_completion(req).await?;
1421        resp.first_text().map(|s| s.to_string())
1422    }
1423
1424    /// Upload a local file using provider's multipart endpoint and return a URL or file id.
1425    ///
1426    /// Behavior:
1427    /// - For config-driven providers, uses their configured `upload_endpoint` if available.
1428    /// - For OpenAI, posts to `{base_url}/files`.
1429    /// - Providers without a known upload endpoint return `UnsupportedFeature`.
1430    pub async fn upload_file(&self, path: &str) -> Result<String, AiLibError> {
1431        // Determine base_url precedence: explicit connection_options > provider default
1432        let base_url = if let Some(opts) = &self.connection_options {
1433            if let Some(b) = &opts.base_url {
1434                b.clone()
1435            } else {
1436                self.provider_default_base_url()?
1437            }
1438        } else {
1439            self.provider_default_base_url()?
1440        };
1441
1442        // Determine upload endpoint
1443        let endpoint: Option<String> = if self.provider.is_config_driven() {
1444            // Use provider default config to discover upload endpoint
1445            let cfg = self.provider.get_default_config()?;
1446            cfg.upload_endpoint.clone()
1447        } else {
1448            match self.provider {
1449                Provider::OpenAI => Some("/files".to_string()),
1450                _ => None,
1451            }
1452        };
1453
1454        let endpoint = endpoint.ok_or_else(|| {
1455            AiLibError::UnsupportedFeature(format!(
1456                "Provider {:?} does not expose an upload endpoint in OSS",
1457                self.provider
1458            ))
1459        })?;
1460
1461        // Compose URL (avoid double slashes)
1462        let upload_url = if base_url.ends_with('/') {
1463            format!("{}{}", base_url.trim_end_matches('/'), endpoint)
1464        } else {
1465            format!("{}{}", base_url, endpoint)
1466        };
1467
1468        // Perform upload using unified transport helper (uses injected transport when None)
1469        crate::provider::utils::upload_file_with_transport(None, &upload_url, path, "file").await
1470    }
1471
1472    fn provider_default_base_url(&self) -> Result<String, AiLibError> {
1473        if self.provider.is_config_driven() {
1474            Ok(self.provider.get_default_config()?.base_url)
1475        } else {
1476            match self.provider {
1477                Provider::OpenAI => Ok("https://api.openai.com/v1".to_string()),
1478                Provider::Gemini => {
1479                    Ok("https://generativelanguage.googleapis.com/v1beta".to_string())
1480                }
1481                Provider::Mistral => Ok("https://api.mistral.ai".to_string()),
1482                Provider::Cohere => Ok("https://api.cohere.ai".to_string()),
1483                _ => Err(AiLibError::ConfigurationError(
1484                    "No default base URL for provider".to_string(),
1485                )),
1486            }
1487        }
1488    }
1489}
1490
1491/// Streaming response cancel handle
1492pub struct CancelHandle {
1493    sender: Option<oneshot::Sender<()>>,
1494}
1495
1496impl CancelHandle {
1497    /// Cancel streaming response
1498    pub fn cancel(mut self) {
1499        if let Some(sender) = self.sender.take() {
1500            let _ = sender.send(());
1501        }
1502    }
1503}
1504
1505/// AI client builder with progressive custom configuration
1506///
1507/// Usage examples:
1508/// ```rust
1509/// use ai_lib::{AiClientBuilder, Provider};
1510///
1511/// // Simplest usage - automatic environment variable detection
1512/// let client = AiClientBuilder::new(Provider::Groq).build()?;
1513///
1514/// // Custom base_url and proxy
1515/// let client = AiClientBuilder::new(Provider::Groq)
1516///     .with_base_url("https://custom.groq.com")
1517///     .with_proxy(Some("http://proxy.example.com:8080"))
1518///     .build()?;
1519///
1520/// // Full custom configuration
1521/// let client = AiClientBuilder::new(Provider::Groq)
1522///     .with_base_url("https://custom.groq.com")
1523///     .with_proxy(Some("http://proxy.example.com:8080"))
1524///     .with_timeout(std::time::Duration::from_secs(60))
1525///     .with_pool_config(32, std::time::Duration::from_secs(90))
1526///     .build()?;
1527/// # Ok::<(), ai_lib::AiLibError>(())
1528/// ```
1529pub struct AiClientBuilder {
1530    provider: Provider,
1531    base_url: Option<String>,
1532    proxy_url: Option<String>,
1533    timeout: Option<std::time::Duration>,
1534    pool_max_idle: Option<usize>,
1535    pool_idle_timeout: Option<std::time::Duration>,
1536    metrics: Option<Arc<dyn Metrics>>,
1537    // Model configuration options
1538    default_chat_model: Option<String>,
1539    default_multimodal_model: Option<String>,
1540    // Resilience configuration
1541    resilience_config: ResilienceConfig,
1542    #[cfg(feature = "routing_mvp")]
1543    routing_array: Option<crate::provider::models::ModelArray>,
1544    #[cfg(feature = "interceptors")]
1545    interceptor_pipeline: Option<crate::interceptors::InterceptorPipeline>,
1546}
1547
1548impl AiClientBuilder {
1549    /// Create a new builder instance
1550    ///
1551    /// # Arguments
1552    /// * `provider` - The AI model provider to use
1553    ///
1554    /// # Returns
1555    /// * `Self` - Builder instance
1556    pub fn new(provider: Provider) -> Self {
1557        Self {
1558            provider,
1559            base_url: None,
1560            proxy_url: None,
1561            timeout: None,
1562            pool_max_idle: None,
1563            pool_idle_timeout: None,
1564            metrics: None,
1565            default_chat_model: None,
1566            default_multimodal_model: None,
1567            resilience_config: ResilienceConfig::default(),
1568            #[cfg(feature = "routing_mvp")]
1569            routing_array: None,
1570            #[cfg(feature = "interceptors")]
1571            interceptor_pipeline: None,
1572        }
1573    }
1574
1575    /// Check if provider is config-driven (uses GenericAdapter)
1576    fn is_config_driven_provider(provider: Provider) -> bool {
1577        provider.is_config_driven()
1578    }
1579
1580    /// Set custom base URL
1581    ///
1582    /// # Arguments
1583    /// * `base_url` - Custom base URL
1584    ///
1585    /// # Returns
1586    /// * `Self` - Builder instance for method chaining
1587    pub fn with_base_url(mut self, base_url: &str) -> Self {
1588        self.base_url = Some(base_url.to_string());
1589        self
1590    }
1591
1592    /// Set custom proxy URL
1593    ///
1594    /// # Arguments
1595    /// * `proxy_url` - Custom proxy URL, or None to use AI_PROXY_URL environment variable
1596    ///
1597    /// # Returns
1598    /// * `Self` - Builder instance for method chaining
1599    ///
1600    /// # Examples
1601    /// ```rust
1602    /// use ai_lib::{AiClientBuilder, Provider};
1603    ///
1604    /// // Use specific proxy URL
1605    /// let client = AiClientBuilder::new(Provider::Groq)
1606    ///     .with_proxy(Some("http://proxy.example.com:8080"))
1607    ///     .build()?;
1608    ///
1609    /// // Use AI_PROXY_URL environment variable
1610    /// let client = AiClientBuilder::new(Provider::Groq)
1611    ///     .with_proxy(None)
1612    ///     .build()?;
1613    /// # Ok::<(), ai_lib::AiLibError>(())
1614    /// ```
1615    pub fn with_proxy(mut self, proxy_url: Option<&str>) -> Self {
1616        self.proxy_url = proxy_url.map(|s| s.to_string());
1617        self
1618    }
1619
1620    /// Explicitly disable proxy usage
1621    ///
1622    /// This method ensures that no proxy will be used, regardless of environment variables.
1623    ///
1624    /// # Returns
1625    /// * `Self` - Builder instance for method chaining
1626    ///
1627    /// # Example
1628    /// ```rust
1629    /// use ai_lib::{AiClientBuilder, Provider};
1630    ///
1631    /// let client = AiClientBuilder::new(Provider::Groq)
1632    ///     .build()?;
1633    /// # Ok::<(), ai_lib::AiLibError>(())
1634    /// ```
1635    pub fn without_proxy(mut self) -> Self {
1636        self.proxy_url = Some("".to_string());
1637        self
1638    }
1639
1640    /// Set custom timeout duration
1641    ///
1642    /// # Arguments
1643    /// * `timeout` - Custom timeout duration
1644    ///
1645    /// # Returns
1646    /// * `Self` - Builder instance for method chaining
1647    pub fn with_timeout(mut self, timeout: std::time::Duration) -> Self {
1648        self.timeout = Some(timeout);
1649        self
1650    }
1651
1652    /// Set connection pool configuration
1653    ///
1654    /// # Arguments
1655    /// * `max_idle` - Maximum idle connections per host
1656    /// * `idle_timeout` - Idle connection timeout duration
1657    ///
1658    /// # Returns
1659    /// * `Self` - Builder instance for method chaining
1660    pub fn with_pool_config(mut self, max_idle: usize, idle_timeout: std::time::Duration) -> Self {
1661        self.pool_max_idle = Some(max_idle);
1662        self.pool_idle_timeout = Some(idle_timeout);
1663        self
1664    }
1665
1666    /// Set custom metrics implementation
1667    ///
1668    /// # Arguments
1669    /// * `metrics` - Custom metrics implementation
1670    ///
1671    /// # Returns
1672    /// * `Self` - Builder instance for method chaining
1673    pub fn with_metrics(mut self, metrics: Arc<dyn Metrics>) -> Self {
1674        self.metrics = Some(metrics);
1675        self
1676    }
1677
1678    #[cfg(feature = "interceptors")]
1679    pub fn with_interceptor_pipeline(
1680        mut self,
1681        pipeline: crate::interceptors::InterceptorPipeline,
1682    ) -> Self {
1683        self.interceptor_pipeline = Some(pipeline);
1684        self
1685    }
1686
1687    #[cfg(feature = "interceptors")]
1688    pub fn enable_default_interceptors(mut self) -> Self {
1689        let p = crate::interceptors::create_default_interceptors();
1690        self.interceptor_pipeline = Some(p);
1691        self
1692    }
1693
1694    #[cfg(feature = "interceptors")]
1695    pub fn enable_minimal_interceptors(mut self) -> Self {
1696        let p = crate::interceptors::default::DefaultInterceptorsBuilder::new()
1697            .enable_circuit_breaker(false)
1698            .enable_rate_limit(false)
1699            .build();
1700        self.interceptor_pipeline = Some(p);
1701        self
1702    }
1703
1704    /// Set default chat model for the client
1705    ///
1706    /// # Arguments
1707    /// * `model` - Default chat model name
1708    ///
1709    /// # Returns
1710    /// * `Self` - Builder instance for method chaining
1711    ///
1712    /// # Example
1713    /// ```rust
1714    /// use ai_lib::{AiClientBuilder, Provider};
1715    ///
1716    /// let client = AiClientBuilder::new(Provider::Groq)
1717    ///     .with_default_chat_model("llama-3.1-8b-instant")
1718    ///     .build()?;
1719    /// # Ok::<(), ai_lib::AiLibError>(())
1720    /// ```
1721    pub fn with_default_chat_model(mut self, model: &str) -> Self {
1722        self.default_chat_model = Some(model.to_string());
1723        self
1724    }
1725
1726    /// Set default multimodal model for the client
1727    ///
1728    /// # Arguments
1729    /// * `model` - Default multimodal model name
1730    ///
1731    /// # Returns
1732    /// * `Self` - Builder instance for method chaining
1733    ///
1734    /// # Example
1735    /// ```rust
1736    /// use ai_lib::{AiClientBuilder, Provider};
1737    ///
1738    /// let client = AiClientBuilder::new(Provider::Groq)
1739    ///     .with_default_multimodal_model("llama-3.2-11b-vision")
1740    ///     .build()?;
1741    /// # Ok::<(), ai_lib::AiLibError>(())
1742    /// ```
1743    pub fn with_default_multimodal_model(mut self, model: &str) -> Self {
1744        self.default_multimodal_model = Some(model.to_string());
1745        self
1746    }
1747
1748    /// Enable smart defaults for resilience features
1749    ///
1750    /// This method enables reasonable default configurations for circuit breaker,
1751    /// rate limiting, and error handling without requiring detailed configuration.
1752    ///
1753    /// # Returns
1754    /// * `Self` - Builder instance for method chaining
1755    ///
1756    /// # Example
1757    /// ```rust
1758    /// use ai_lib::{AiClientBuilder, Provider};
1759    ///
1760    /// let client = AiClientBuilder::new(Provider::Groq)
1761    ///     .with_smart_defaults()
1762    ///     .build()?;
1763    /// # Ok::<(), ai_lib::AiLibError>(())
1764    /// ```
1765    pub fn with_smart_defaults(mut self) -> Self {
1766        self.resilience_config = ResilienceConfig::smart_defaults();
1767        self
1768    }
1769
1770    /// Configure for production environment
1771    ///
1772    /// This method applies production-ready configurations for all resilience
1773    /// features with conservative settings for maximum reliability.
1774    ///
1775    /// # Returns
1776    /// * `Self` - Builder instance for method chaining
1777    ///
1778    /// # Example
1779    /// ```rust
1780    /// use ai_lib::{AiClientBuilder, Provider};
1781    ///
1782    /// let client = AiClientBuilder::new(Provider::Groq)
1783    ///     .for_production()
1784    ///     .build()?;
1785    /// # Ok::<(), ai_lib::AiLibError>(())
1786    /// ```
1787    pub fn for_production(mut self) -> Self {
1788        self.resilience_config = ResilienceConfig::production();
1789        self
1790    }
1791
1792    /// Configure for development environment
1793    ///
1794    /// This method applies development-friendly configurations with more
1795    /// lenient settings for easier debugging and testing.
1796    ///
1797    /// # Returns
1798    /// * `Self` - Builder instance for method chaining
1799    ///
1800    /// # Example
1801    /// ```rust
1802    /// use ai_lib::{AiClientBuilder, Provider};
1803    ///
1804    /// let client = AiClientBuilder::new(Provider::Groq)
1805    ///     .for_development()
1806    ///     .build()?;
1807    /// # Ok::<(), ai_lib::AiLibError>(())
1808    /// ```
1809    pub fn for_development(mut self) -> Self {
1810        self.resilience_config = ResilienceConfig::development();
1811        self
1812    }
1813
1814    /// Configure a simple max concurrent requests backpressure guard
1815    ///
1816    /// This provides a convenient way to set a global concurrency cap using a semaphore.
1817    /// It is equivalent于设置 `ResilienceConfig.backpressure.max_concurrent_requests`。
1818    pub fn with_max_concurrency(mut self, max_concurrent_requests: usize) -> Self {
1819        let mut cfg = self.resilience_config.clone();
1820        cfg.backpressure = Some(crate::config::BackpressureConfig { max_concurrent_requests });
1821        self.resilience_config = cfg;
1822        self
1823    }
1824
1825    /// Set custom resilience configuration
1826    ///
1827    /// # Arguments
1828    /// * `config` - Custom resilience configuration
1829    ///
1830    /// # Returns
1831    /// * `Self` - Builder instance for method chaining
1832    pub fn with_resilience_config(mut self, config: ResilienceConfig) -> Self {
1833        self.resilience_config = config;
1834        self
1835    }
1836
1837    #[cfg(feature = "routing_mvp")]
1838    /// Provide a `ModelArray` for client-side routing and fallback.
1839    pub fn with_routing_array(mut self, array: crate::provider::models::ModelArray) -> Self {
1840        self.routing_array = Some(array);
1841        self
1842    }
1843
1844    /// Build AiClient instance
1845    ///
1846    /// The build process applies configuration in the following priority order:
1847    /// 1. Explicitly set configuration (via with_* methods)
1848    /// 2. Environment variable configuration
1849    /// 3. Default configuration
1850    ///
1851    /// # Returns
1852    /// * `Result<AiClient, AiLibError>` - Returns client instance on success, error on failure
1853    pub fn build(self) -> Result<AiClient, AiLibError> {
1854        // 1. Determine base_url: explicit setting > environment variable > default
1855        let base_url = self.determine_base_url()?;
1856
1857        // 2. Determine proxy_url: explicit setting > environment variable
1858        let proxy_url = self.determine_proxy_url();
1859
1860        // 3. Determine timeout: explicit setting > default
1861        let timeout = self
1862            .timeout
1863            .unwrap_or_else(|| std::time::Duration::from_secs(30));
1864
1865        // 4. Create adapter
1866        let adapter: Box<dyn ChatApi> = if Self::is_config_driven_provider(self.provider) {
1867            // All config-driven providers use the same logic - much cleaner!
1868            // Create custom ProviderConfig (if needed)
1869            let config = self.create_custom_config(base_url)?;
1870            // Create custom HttpTransport (if needed)
1871            let transport = self.create_custom_transport(proxy_url.clone(), timeout)?;
1872            create_generic_adapter(config, transport)?
1873        } else {
1874            // Independent adapters - simple one-liners
1875            match self.provider {
1876                Provider::OpenAI => Box::new(OpenAiAdapter::new()?),
1877                Provider::Gemini => Box::new(GeminiAdapter::new()?),
1878                Provider::Mistral => Box::new(MistralAdapter::new()?),
1879                Provider::Cohere => Box::new(CohereAdapter::new()?),
1880                _ => unreachable!("All providers should be handled by now"),
1881            }
1882        };
1883
1884        // 5. Build backpressure controller if configured
1885        let bp_ctrl: Option<Arc<BackpressureController>> = self
1886            .resilience_config
1887            .backpressure
1888            .as_ref()
1889            .map(|cfg| Arc::new(BackpressureController::new(cfg.max_concurrent_requests)));
1890
1891        // 6. Create AiClient
1892        let client = AiClient {
1893            provider: self.provider,
1894            adapter,
1895            metrics: self.metrics.unwrap_or_else(|| Arc::new(NoopMetrics::new())),
1896            connection_options: None,
1897            custom_default_chat_model: self.default_chat_model,
1898            custom_default_multimodal_model: self.default_multimodal_model,
1899            backpressure: bp_ctrl,
1900            failover_providers: None,
1901            #[cfg(feature = "routing_mvp")]
1902            routing_array: self.routing_array,
1903            #[cfg(feature = "interceptors")]
1904            interceptor_pipeline: self.interceptor_pipeline,
1905        };
1906
1907        Ok(client)
1908    }
1909
1910    /// Determine base_url, priority: explicit setting > environment variable > default
1911    fn determine_base_url(&self) -> Result<String, AiLibError> {
1912        // 1. Explicitly set base_url
1913        if let Some(ref base_url) = self.base_url {
1914            return Ok(base_url.clone());
1915        }
1916
1917        // 2. base_url from environment variable
1918        let env_var_name = self.get_base_url_env_var_name();
1919        if let Ok(base_url) = std::env::var(&env_var_name) {
1920            return Ok(base_url);
1921        }
1922
1923        // 3. Use default configuration (only for config-driven providers)
1924        if Self::is_config_driven_provider(self.provider) {
1925            let default_config = self.get_default_provider_config()?;
1926            Ok(default_config.base_url)
1927        } else {
1928            // For independent providers, return a default base URL
1929            match self.provider {
1930                Provider::OpenAI => Ok("https://api.openai.com".to_string()),
1931                Provider::Gemini => Ok("https://generativelanguage.googleapis.com".to_string()),
1932                Provider::Mistral => Ok("https://api.mistral.ai".to_string()),
1933                Provider::Cohere => Ok("https://api.cohere.ai".to_string()),
1934                _ => Err(AiLibError::ConfigurationError(
1935                    "Unknown provider for base URL determination".to_string(),
1936                )),
1937            }
1938        }
1939    }
1940
1941    /// Determine proxy_url, priority: explicit setting > environment variable
1942    fn determine_proxy_url(&self) -> Option<String> {
1943        // 1. Explicitly set proxy_url
1944        if let Some(ref proxy_url) = self.proxy_url {
1945            // If proxy_url is empty string, it means explicitly no proxy
1946            if proxy_url.is_empty() {
1947                return None;
1948            }
1949            return Some(proxy_url.clone());
1950        }
1951
1952        // 2. AI_PROXY_URL from environment variable
1953        std::env::var("AI_PROXY_URL").ok()
1954    }
1955
1956    /// Get environment variable name for corresponding provider
1957    fn get_base_url_env_var_name(&self) -> String {
1958        match self.provider {
1959            Provider::Groq => "GROQ_BASE_URL".to_string(),
1960            Provider::XaiGrok => "GROK_BASE_URL".to_string(),
1961            Provider::Ollama => "OLLAMA_BASE_URL".to_string(),
1962            Provider::DeepSeek => "DEEPSEEK_BASE_URL".to_string(),
1963            Provider::Qwen => "DASHSCOPE_BASE_URL".to_string(),
1964            Provider::BaiduWenxin => "BAIDU_WENXIN_BASE_URL".to_string(),
1965            Provider::TencentHunyuan => "TENCENT_HUNYUAN_BASE_URL".to_string(),
1966            Provider::IflytekSpark => "IFLYTEK_BASE_URL".to_string(),
1967            Provider::Moonshot => "MOONSHOT_BASE_URL".to_string(),
1968            Provider::Anthropic => "ANTHROPIC_BASE_URL".to_string(),
1969            Provider::AzureOpenAI => "AZURE_OPENAI_BASE_URL".to_string(),
1970            Provider::HuggingFace => "HUGGINGFACE_BASE_URL".to_string(),
1971            Provider::TogetherAI => "TOGETHER_BASE_URL".to_string(),
1972            Provider::OpenRouter => "OPENROUTER_BASE_URL".to_string(),
1973            Provider::Replicate => "REPLICATE_BASE_URL".to_string(),
1974            Provider::ZhipuAI => "ZHIPU_BASE_URL".to_string(),
1975            Provider::MiniMax => "MINIMAX_BASE_URL".to_string(),
1976            Provider::Perplexity => "PERPLEXITY_BASE_URL".to_string(),
1977            Provider::AI21 => "AI21_BASE_URL".to_string(),
1978            // These providers don't support custom base_url
1979            Provider::OpenAI | Provider::Gemini | Provider::Mistral | Provider::Cohere => {
1980                "".to_string()
1981            }
1982        }
1983    }
1984
1985    /// Get default provider configuration
1986    fn get_default_provider_config(
1987        &self,
1988    ) -> Result<crate::provider::config::ProviderConfig, AiLibError> {
1989        self.provider.get_default_config()
1990    }
1991
1992    /// Create custom ProviderConfig
1993    fn create_custom_config(
1994        &self,
1995        base_url: String,
1996    ) -> Result<crate::provider::config::ProviderConfig, AiLibError> {
1997        let mut config = self.get_default_provider_config()?;
1998        config.base_url = base_url;
1999        Ok(config)
2000    }
2001
2002    /// Create custom HttpTransport
2003    fn create_custom_transport(
2004        &self,
2005        proxy_url: Option<String>,
2006        timeout: std::time::Duration,
2007    ) -> Result<Option<crate::transport::DynHttpTransportRef>, AiLibError> {
2008        // If no custom configuration, return None (use default transport)
2009        if proxy_url.is_none() && self.pool_max_idle.is_none() && self.pool_idle_timeout.is_none() {
2010            return Ok(None);
2011        }
2012
2013        // Create custom HttpTransportConfig
2014        let transport_config = crate::transport::HttpTransportConfig {
2015            timeout,
2016            proxy: proxy_url,
2017            pool_max_idle_per_host: self.pool_max_idle,
2018            pool_idle_timeout: self.pool_idle_timeout,
2019        };
2020
2021        // Create custom HttpTransport
2022        let transport = crate::transport::HttpTransport::new_with_config(transport_config)?;
2023        Ok(Some(transport.boxed()))
2024    }
2025}
2026
2027/// Controllable streaming response
2028struct ControlledStream {
2029    inner: Box<dyn Stream<Item = Result<ChatCompletionChunk, AiLibError>> + Send + Unpin>,
2030    cancel_rx: Option<oneshot::Receiver<()>>,
2031    // Hold a backpressure permit for the lifetime of the stream if present
2032    _bp_permit: Option<BackpressurePermit>,
2033}
2034
2035impl ControlledStream {
2036    fn new_with_bp(
2037        inner: Box<dyn Stream<Item = Result<ChatCompletionChunk, AiLibError>> + Send + Unpin>,
2038        cancel_rx: Option<oneshot::Receiver<()>>,
2039        bp_permit: Option<BackpressurePermit>,
2040    ) -> Self {
2041        Self { inner, cancel_rx, _bp_permit: bp_permit }
2042    }
2043}
2044
2045impl Stream for ControlledStream {
2046    type Item = Result<ChatCompletionChunk, AiLibError>;
2047
2048    fn poll_next(
2049        mut self: std::pin::Pin<&mut Self>,
2050        cx: &mut std::task::Context<'_>,
2051    ) -> std::task::Poll<Option<Self::Item>> {
2052        use futures::stream::StreamExt;
2053        use std::task::Poll;
2054
2055        // Check if cancelled
2056        if let Some(ref mut cancel_rx) = self.cancel_rx {
2057            match Future::poll(std::pin::Pin::new(cancel_rx), cx) {
2058                Poll::Ready(_) => {
2059                    self.cancel_rx = None;
2060                    return Poll::Ready(Some(Err(AiLibError::ProviderError(
2061                        "Stream cancelled".to_string(),
2062                    ))));
2063                }
2064                Poll::Pending => {}
2065            }
2066        }
2067
2068        // Poll inner stream
2069        self.inner.poll_next_unpin(cx)
2070    }
2071}