ai_lib/client.rs
1use crate::api::{ChatApi, ChatCompletionChunk};
2use crate::config::{ConnectionOptions, ResilienceConfig};
3use crate::metrics::{Metrics, NoopMetrics};
4use crate::provider::{
5 classification::ProviderClassification, AI21Adapter, CohereAdapter, GeminiAdapter, GenericAdapter,
6 MistralAdapter, OpenAiAdapter, PerplexityAdapter, ProviderConfigs,
7};
8use crate::types::{AiLibError, ChatCompletionRequest, ChatCompletionResponse};
9use futures::stream::Stream;
10use futures::Future;
11use std::sync::Arc;
12use tokio::sync::oneshot;
13use crate::rate_limiter::{BackpressureController, BackpressurePermit};
14
15// Note: Some variables are marked as `mut` because they are modified within conditional compilation
16// blocks (`#[cfg(feature = "routing_mvp")]`). The linter may not detect these modifications.
17
18/// Model configuration options for explicit model selection
19#[derive(Debug, Clone)]
20pub struct ModelOptions {
21 pub chat_model: Option<String>,
22 pub multimodal_model: Option<String>,
23 pub fallback_models: Vec<String>,
24 pub auto_discovery: bool,
25}
26
27impl Default for ModelOptions {
28 fn default() -> Self {
29 Self {
30 chat_model: None,
31 multimodal_model: None,
32 fallback_models: Vec::new(),
33 auto_discovery: true,
34 }
35 }
36}
37
38impl ModelOptions {
39 /// Create default model options
40 pub fn new() -> Self {
41 Self::default()
42 }
43
44 /// Set chat model
45 pub fn with_chat_model(mut self, model: &str) -> Self {
46 self.chat_model = Some(model.to_string());
47 self
48 }
49
50 /// Set multimodal model
51 pub fn with_multimodal_model(mut self, model: &str) -> Self {
52 self.multimodal_model = Some(model.to_string());
53 self
54 }
55
56 /// Set fallback models
57 pub fn with_fallback_models(mut self, models: Vec<&str>) -> Self {
58 self.fallback_models = models.into_iter().map(|s| s.to_string()).collect();
59 self
60 }
61
62 /// Enable or disable auto discovery
63 pub fn with_auto_discovery(mut self, enabled: bool) -> Self {
64 self.auto_discovery = enabled;
65 self
66 }
67}
68
69/// Helper function to create GenericAdapter with optional custom transport
70fn create_generic_adapter(
71 config: crate::provider::config::ProviderConfig,
72 transport: Option<crate::transport::DynHttpTransportRef>,
73) -> Result<Box<dyn ChatApi>, AiLibError> {
74 if let Some(custom_transport) = transport {
75 Ok(Box::new(GenericAdapter::with_transport_ref(
76 config,
77 custom_transport,
78 )?))
79 } else {
80 Ok(Box::new(GenericAdapter::new(config)?))
81 }
82}
83
84/// Unified AI client module
85///
86/// AI model provider enumeration
87#[derive(Debug, Clone, Copy, PartialEq)]
88pub enum Provider {
89 // Config-driven providers
90 Groq,
91 XaiGrok,
92 Ollama,
93 DeepSeek,
94 Anthropic,
95 AzureOpenAI,
96 HuggingFace,
97 TogetherAI,
98 OpenRouter,
99 Replicate,
100 // Chinese providers (OpenAI-compatible or config-driven)
101 BaiduWenxin,
102 TencentHunyuan,
103 IflytekSpark,
104 Moonshot,
105 ZhipuAI,
106 MiniMax,
107 // Independent adapters
108 OpenAI,
109 Qwen,
110 Gemini,
111 Mistral,
112 Cohere,
113 Perplexity,
114 AI21,
115 // Bedrock removed (deferred)
116}
117
118impl Provider {
119 /// Get the provider's preferred default chat model.
120 /// These should mirror the values used inside `ProviderConfigs`.
121 pub fn default_chat_model(&self) -> &'static str {
122 match self {
123 Provider::Groq => "llama-3.1-8b-instant",
124 Provider::XaiGrok => "grok-beta",
125 Provider::Ollama => "llama3-8b",
126 Provider::DeepSeek => "deepseek-chat",
127 Provider::Anthropic => "claude-3-5-sonnet-20241022",
128 Provider::AzureOpenAI => "gpt-35-turbo",
129 Provider::HuggingFace => "microsoft/DialoGPT-medium",
130 Provider::TogetherAI => "meta-llama/Llama-3-8b-chat-hf",
131 Provider::OpenRouter => "openai/gpt-3.5-turbo",
132 Provider::Replicate => "meta/llama-2-7b-chat",
133 Provider::BaiduWenxin => "ernie-3.5",
134 Provider::TencentHunyuan => "hunyuan-standard",
135 Provider::IflytekSpark => "spark-v3.0",
136 Provider::Moonshot => "moonshot-v1-8k",
137 Provider::ZhipuAI => "glm-4",
138 Provider::MiniMax => "abab6.5-chat",
139 Provider::OpenAI => "gpt-3.5-turbo",
140 Provider::Qwen => "qwen-turbo",
141 Provider::Gemini => "gemini-1.5-flash", // current v1beta-supported chat model
142 Provider::Mistral => "mistral-small", // generic default
143 Provider::Cohere => "command-r", // chat-capable model
144 Provider::Perplexity => "llama-3.1-sonar-small-128k-online",
145 Provider::AI21 => "j2-ultra",
146 }
147 }
148
149 /// Get the provider's preferred multimodal model (if any).
150 pub fn default_multimodal_model(&self) -> Option<&'static str> {
151 match self {
152 Provider::OpenAI => Some("gpt-4o"),
153 Provider::AzureOpenAI => Some("gpt-4o"),
154 Provider::Anthropic => Some("claude-3-5-sonnet-20241022"),
155 Provider::Groq => None, // No multimodal model currently available
156 Provider::Gemini => Some("gemini-1.5-flash"),
157 Provider::Cohere => Some("command-r-plus"),
158 Provider::OpenRouter => Some("openai/gpt-4o"),
159 Provider::Replicate => Some("meta/llama-2-7b-chat"),
160 Provider::ZhipuAI => Some("glm-4v"),
161 Provider::MiniMax => Some("abab6.5-chat"),
162 Provider::Perplexity => Some("llama-3.1-sonar-small-128k-online"),
163 Provider::AI21 => Some("j2-ultra"),
164 // Others presently have no clearly documented multimodal endpoint or are not yet wired.
165 _ => None,
166 }
167 }
168}
169
170/// Unified AI client
171///
172/// Usage example:
173/// ```rust
174/// use ai_lib::{AiClient, Provider, ChatCompletionRequest, Message, Role};
175///
176/// #[tokio::main]
177/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
178/// // Switch model provider by changing Provider value
179/// let client = AiClient::new(Provider::Groq)?;
180///
181/// let request = ChatCompletionRequest::new(
182/// "test-model".to_string(),
183/// vec![Message {
184/// role: Role::User,
185/// content: ai_lib::types::common::Content::Text("Hello".to_string()),
186/// function_call: None,
187/// }],
188/// );
189///
190/// // Note: Set GROQ_API_KEY environment variable for actual API calls
191/// // Optional: Set AI_PROXY_URL environment variable to use proxy server
192/// // let response = client.chat_completion(request).await?;
193///
194/// println!("Client created successfully with provider: {:?}", client.current_provider());
195/// println!("Request prepared for model: {}", request.model);
196///
197/// Ok(())
198/// }
199/// ```
200///
201/// # Proxy Configuration
202///
203/// Configure proxy server by setting the `AI_PROXY_URL` environment variable:
204///
205/// ```bash
206/// export AI_PROXY_URL=http://proxy.example.com:8080
207/// ```
208///
209/// Supported proxy formats:
210/// - HTTP proxy: `http://proxy.example.com:8080`
211/// - HTTPS proxy: `https://proxy.example.com:8080`
212/// - With authentication: `http://user:pass@proxy.example.com:8080`
213pub struct AiClient {
214 provider: Provider,
215 adapter: Box<dyn ChatApi>,
216 metrics: Arc<dyn Metrics>,
217 connection_options: Option<ConnectionOptions>,
218 #[cfg(feature = "interceptors")]
219 interceptor_pipeline: Option<crate::interceptors::InterceptorPipeline>,
220 // Custom default models (override provider defaults)
221 custom_default_chat_model: Option<String>,
222 custom_default_multimodal_model: Option<String>,
223 // Optional backpressure controller
224 backpressure: Option<Arc<BackpressureController>>,
225 // Optional basic failover providers (OSS): if a request fails with a retryable error,
226 // we will try providers in this list in order.
227 failover_providers: Option<Vec<Provider>>,
228 #[cfg(feature = "routing_mvp")]
229 routing_array: Option<crate::provider::models::ModelArray>,
230}
231
232impl AiClient {
233 /// Get the effective default chat model for this client (honors custom override)
234 pub fn default_chat_model(&self) -> String {
235 self.custom_default_chat_model
236 .clone()
237 .unwrap_or_else(|| self.provider.default_chat_model().to_string())
238 }
239 /// Create a new AI client
240 ///
241 /// # Arguments
242 /// * `provider` - The AI model provider to use
243 ///
244 /// # Returns
245 /// * `Result<Self, AiLibError>` - Client instance on success, error on failure
246 ///
247 /// # Example
248 /// ```rust
249 /// use ai_lib::{AiClient, Provider};
250 ///
251 /// let client = AiClient::new(Provider::Groq)?;
252 /// # Ok::<(), ai_lib::AiLibError>(())
253 /// ```
254 pub fn new(provider: Provider) -> Result<Self, AiLibError> {
255 // Use the new builder to create client with automatic environment variable detection
256 let mut c = AiClientBuilder::new(provider).build()?;
257 c.connection_options = None;
258 Ok(c)
259 }
260
261 /// Configure a basic failover chain for automatic provider switching on failures.
262 ///
263 /// When a request fails with a retryable error (network, timeout, rate limit, 5xx),
264 /// the client will automatically attempt subsequent providers in the specified order.
265 ///
266 /// ## Execution Behavior
267 /// - **Error Types**: Only retryable errors trigger failover (network, timeout, rate limit, 5xx)
268 /// - **Order**: Providers are tried in the exact order specified in the vector
269 /// - **Skip Current**: The current provider is automatically skipped if it appears in the chain
270 /// - **State Preservation**: Routing selections and request modifications are preserved during failover
271 ///
272 /// ## Integration with Other Features
273 /// - **Routing**: When used with `with_routing_array()`, the selected model is preserved
274 /// across failover attempts. Failover providers will use the same model selection.
275 /// - **Interceptors**: Failover happens after interceptor processing, so interceptors
276 /// can modify requests before failover attempts.
277 /// - **Metrics**: Failover attempts are tracked with `failover.attempts`, `failover.success`,
278 /// and `failover.error` metrics.
279 ///
280 /// ## Examples
281 /// ```rust
282 /// use ai_lib::{AiClient, Provider};
283 ///
284 /// // Basic failover configuration
285 /// let client = AiClient::new(Provider::OpenAI)?
286 /// .with_failover(vec![Provider::Anthropic, Provider::Groq]);
287 ///
288 /// // Combined with routing (requires routing_mvp feature)
289 /// #[cfg(feature = "routing_mvp")]
290 /// {
291 /// let mut array = ai_lib::provider::models::ModelArray::new("production");
292 /// // ... configure array
293 /// let client = client
294 /// .with_routing_array(array)
295 /// .with_failover(vec![Provider::Anthropic, Provider::Groq]);
296 /// }
297 ///
298 /// // Empty vector disables failover
299 /// let client = client.with_failover(vec![]);
300 /// # Ok::<(), ai_lib::AiLibError>(())
301 /// ```
302 ///
303 /// ## Limitations (OSS)
304 /// This is a lightweight OSS feature. For advanced capabilities, consider ai-lib-pro:
305 /// - Weighted failover based on provider performance
306 /// - SLO-aware failover policies
307 /// - Cost-based failover decisions
308 /// - Advanced health checking and circuit breaking
309 ///
310 /// # Arguments
311 /// * `providers` - Ordered list of fallback providers. Empty vector disables failover.
312 ///
313 /// # Returns
314 /// * `Self` - Client instance with failover configuration
315 pub fn with_failover(mut self, providers: Vec<Provider>) -> Self {
316 self.failover_providers = if providers.is_empty() { None } else { Some(providers) };
317 self
318 }
319
320 #[cfg(feature = "routing_mvp")]
321 /// Set a routing model array to enable basic endpoint selection before requests.
322 pub fn with_routing_array(mut self, array: crate::provider::models::ModelArray) -> Self {
323 self.routing_array = Some(array);
324 self
325 }
326
327 // #[cfg(feature = "routing_mvp")]
328 // fn select_routed_model(&mut self, fallback: &str) -> String {
329 // if let Some(arr) = self.routing_array.as_mut() {
330 // if let Some(ep) = arr.select_endpoint() {
331 // return ep.model_name.clone();
332 // }
333 // }
334 // fallback.to_string()
335 // }
336
337 /// Create client with minimal explicit options (base_url/proxy/timeout). Not all providers
338 /// support overrides; unsupported providers ignore unspecified fields gracefully.
339 pub fn with_options(provider: Provider, opts: ConnectionOptions) -> Result<Self, AiLibError> {
340 let config_driven = provider.is_config_driven();
341 let need_builder = config_driven
342 && (opts.base_url.is_some()
343 || opts.proxy.is_some()
344 || opts.timeout.is_some()
345 || opts.disable_proxy);
346 if need_builder {
347 let mut b = AiClient::builder(provider);
348 if let Some(ref base) = opts.base_url {
349 b = b.with_base_url(base);
350 }
351 if opts.disable_proxy {
352 b = b.without_proxy();
353 } else if let Some(ref proxy) = opts.proxy {
354 if proxy.is_empty() {
355 b = b.without_proxy();
356 } else {
357 b = b.with_proxy(Some(proxy));
358 }
359 }
360 if let Some(t) = opts.timeout {
361 b = b.with_timeout(t);
362 }
363 let mut client = b.build()?;
364 // If api_key override + generic provider path: re-wrap adapter using override
365 if opts.api_key.is_some() {
366 // Only applies to config-driven generic adapter providers
367 let new_adapter: Option<Box<dyn ChatApi>> = match provider {
368 Provider::Groq => Some(Box::new(GenericAdapter::new_with_api_key(
369 ProviderConfigs::groq(),
370 opts.api_key.clone(),
371 )?)),
372 Provider::XaiGrok => Some(Box::new(GenericAdapter::new_with_api_key(
373 ProviderConfigs::xai_grok(),
374 opts.api_key.clone(),
375 )?)),
376 Provider::Ollama => Some(Box::new(GenericAdapter::new_with_api_key(
377 ProviderConfigs::ollama(),
378 opts.api_key.clone(),
379 )?)),
380 Provider::DeepSeek => Some(Box::new(GenericAdapter::new_with_api_key(
381 ProviderConfigs::deepseek(),
382 opts.api_key.clone(),
383 )?)),
384 Provider::Qwen => Some(Box::new(GenericAdapter::new_with_api_key(
385 ProviderConfigs::qwen(),
386 opts.api_key.clone(),
387 )?)),
388 Provider::BaiduWenxin => Some(Box::new(GenericAdapter::new_with_api_key(
389 ProviderConfigs::baidu_wenxin(),
390 opts.api_key.clone(),
391 )?)),
392 Provider::TencentHunyuan => Some(Box::new(GenericAdapter::new_with_api_key(
393 ProviderConfigs::tencent_hunyuan(),
394 opts.api_key.clone(),
395 )?)),
396 Provider::IflytekSpark => Some(Box::new(GenericAdapter::new_with_api_key(
397 ProviderConfigs::iflytek_spark(),
398 opts.api_key.clone(),
399 )?)),
400 Provider::Moonshot => Some(Box::new(GenericAdapter::new_with_api_key(
401 ProviderConfigs::moonshot(),
402 opts.api_key.clone(),
403 )?)),
404 Provider::Anthropic => Some(Box::new(GenericAdapter::new_with_api_key(
405 ProviderConfigs::anthropic(),
406 opts.api_key.clone(),
407 )?)),
408 Provider::AzureOpenAI => Some(Box::new(GenericAdapter::new_with_api_key(
409 ProviderConfigs::azure_openai(),
410 opts.api_key.clone(),
411 )?)),
412 Provider::HuggingFace => Some(Box::new(GenericAdapter::new_with_api_key(
413 ProviderConfigs::huggingface(),
414 opts.api_key.clone(),
415 )?)),
416 Provider::TogetherAI => Some(Box::new(GenericAdapter::new_with_api_key(
417 ProviderConfigs::together_ai(),
418 opts.api_key.clone(),
419 )?)),
420 _ => None,
421 };
422 if let Some(a) = new_adapter {
423 client.adapter = a;
424 }
425 }
426 client.connection_options = Some(opts);
427 return Ok(client);
428 }
429
430 // Independent adapters: OpenAI / Gemini / Mistral / Cohere
431 if provider.is_independent() {
432 let adapter: Box<dyn ChatApi> = match provider {
433 Provider::OpenAI => {
434 if let Some(ref k) = opts.api_key {
435 let inner =
436 OpenAiAdapter::new_with_overrides(k.clone(), opts.base_url.clone())?;
437 Box::new(inner)
438 } else {
439 let inner = OpenAiAdapter::new()?;
440 Box::new(inner)
441 }
442 }
443 Provider::Gemini => {
444 if let Some(ref k) = opts.api_key {
445 let inner =
446 GeminiAdapter::new_with_overrides(k.clone(), opts.base_url.clone())?;
447 Box::new(inner)
448 } else {
449 let inner = GeminiAdapter::new()?;
450 Box::new(inner)
451 }
452 }
453 Provider::Mistral => {
454 if opts.api_key.is_some() || opts.base_url.is_some() {
455 let inner = MistralAdapter::new_with_overrides(
456 opts.api_key.clone(),
457 opts.base_url.clone(),
458 )?;
459 Box::new(inner)
460 } else {
461 let inner = MistralAdapter::new()?;
462 Box::new(inner)
463 }
464 }
465 Provider::Cohere => {
466 if let Some(ref k) = opts.api_key {
467 let inner =
468 CohereAdapter::new_with_overrides(k.clone(), opts.base_url.clone())?;
469 Box::new(inner)
470 } else {
471 let inner = CohereAdapter::new()?;
472 Box::new(inner)
473 }
474 }
475 _ => unreachable!(),
476 };
477 return Ok(AiClient {
478 provider,
479 adapter,
480 metrics: Arc::new(NoopMetrics::new()),
481 connection_options: Some(opts),
482 custom_default_chat_model: None,
483 custom_default_multimodal_model: None,
484 backpressure: None,
485 failover_providers: None,
486 #[cfg(feature = "routing_mvp")]
487 routing_array: None,
488 #[cfg(feature = "interceptors")]
489 interceptor_pipeline: None,
490 });
491 }
492
493 // Simple config-driven without overrides
494 let mut client = AiClient::new(provider)?;
495 if let Some(ref k) = opts.api_key {
496 let override_adapter: Option<Box<dyn ChatApi>> = match provider {
497 Provider::Groq => Some(Box::new(GenericAdapter::new_with_api_key(
498 ProviderConfigs::groq(),
499 Some(k.clone()),
500 )?)),
501 Provider::XaiGrok => Some(Box::new(GenericAdapter::new_with_api_key(
502 ProviderConfigs::xai_grok(),
503 Some(k.clone()),
504 )?)),
505 Provider::Ollama => Some(Box::new(GenericAdapter::new_with_api_key(
506 ProviderConfigs::ollama(),
507 Some(k.clone()),
508 )?)),
509 Provider::DeepSeek => Some(Box::new(GenericAdapter::new_with_api_key(
510 ProviderConfigs::deepseek(),
511 Some(k.clone()),
512 )?)),
513 Provider::Qwen => Some(Box::new(GenericAdapter::new_with_api_key(
514 ProviderConfigs::qwen(),
515 Some(k.clone()),
516 )?)),
517 Provider::BaiduWenxin => Some(Box::new(GenericAdapter::new_with_api_key(
518 ProviderConfigs::baidu_wenxin(),
519 Some(k.clone()),
520 )?)),
521 Provider::TencentHunyuan => Some(Box::new(GenericAdapter::new_with_api_key(
522 ProviderConfigs::tencent_hunyuan(),
523 Some(k.clone()),
524 )?)),
525 Provider::IflytekSpark => Some(Box::new(GenericAdapter::new_with_api_key(
526 ProviderConfigs::iflytek_spark(),
527 Some(k.clone()),
528 )?)),
529 Provider::Moonshot => Some(Box::new(GenericAdapter::new_with_api_key(
530 ProviderConfigs::moonshot(),
531 Some(k.clone()),
532 )?)),
533 Provider::Anthropic => Some(Box::new(GenericAdapter::new_with_api_key(
534 ProviderConfigs::anthropic(),
535 Some(k.clone()),
536 )?)),
537 Provider::AzureOpenAI => Some(Box::new(GenericAdapter::new_with_api_key(
538 ProviderConfigs::azure_openai(),
539 Some(k.clone()),
540 )?)),
541 Provider::HuggingFace => Some(Box::new(GenericAdapter::new_with_api_key(
542 ProviderConfigs::huggingface(),
543 Some(k.clone()),
544 )?)),
545 Provider::TogetherAI => Some(Box::new(GenericAdapter::new_with_api_key(
546 ProviderConfigs::together_ai(),
547 Some(k.clone()),
548 )?)),
549 _ => None,
550 };
551 if let Some(a) = override_adapter {
552 client.adapter = a;
553 }
554 }
555 client.connection_options = Some(opts);
556 Ok(client)
557 }
558
559 pub fn connection_options(&self) -> Option<&ConnectionOptions> {
560 self.connection_options.as_ref()
561 }
562
563 /// Create a new AI client builder
564 ///
565 /// The builder pattern allows more flexible client configuration:
566 /// - Automatic environment variable detection
567 /// - Support for custom base_url and proxy
568 /// - Support for custom timeout and connection pool configuration
569 ///
570 /// # Arguments
571 /// * `provider` - The AI model provider to use
572 ///
573 /// # Returns
574 /// * `AiClientBuilder` - Builder instance
575 ///
576 /// # Example
577 /// ```rust
578 /// use ai_lib::{AiClient, Provider};
579 ///
580 /// // Simplest usage - automatic environment variable detection
581 /// let client = AiClient::builder(Provider::Groq).build()?;
582 ///
583 /// // Custom base_url and proxy
584 /// let client = AiClient::builder(Provider::Groq)
585 /// .with_base_url("https://custom.groq.com")
586 /// .with_proxy(Some("http://proxy.example.com:8080"))
587 /// .build()?;
588 /// # Ok::<(), ai_lib::AiLibError>(())
589 /// ```
590 pub fn builder(provider: Provider) -> AiClientBuilder {
591 AiClientBuilder::new(provider)
592 }
593
594 /// Create AiClient with injected metrics implementation
595 pub fn new_with_metrics(
596 provider: Provider,
597 metrics: Arc<dyn Metrics>,
598 ) -> Result<Self, AiLibError> {
599 let adapter: Box<dyn ChatApi> = match provider {
600 Provider::Groq => Box::new(GenericAdapter::new(ProviderConfigs::groq())?),
601 Provider::XaiGrok => Box::new(GenericAdapter::new(ProviderConfigs::xai_grok())?),
602 Provider::Ollama => Box::new(GenericAdapter::new(ProviderConfigs::ollama())?),
603 Provider::DeepSeek => Box::new(GenericAdapter::new(ProviderConfigs::deepseek())?),
604 Provider::Qwen => Box::new(GenericAdapter::new(ProviderConfigs::qwen())?),
605 Provider::Anthropic => Box::new(GenericAdapter::new(ProviderConfigs::anthropic())?),
606 Provider::BaiduWenxin => {
607 Box::new(GenericAdapter::new(ProviderConfigs::baidu_wenxin())?)
608 }
609 Provider::TencentHunyuan => {
610 Box::new(GenericAdapter::new(ProviderConfigs::tencent_hunyuan())?)
611 }
612 Provider::IflytekSpark => {
613 Box::new(GenericAdapter::new(ProviderConfigs::iflytek_spark())?)
614 }
615 Provider::Moonshot => Box::new(GenericAdapter::new(ProviderConfigs::moonshot())?),
616 Provider::AzureOpenAI => {
617 Box::new(GenericAdapter::new(ProviderConfigs::azure_openai())?)
618 }
619 Provider::HuggingFace => Box::new(GenericAdapter::new(ProviderConfigs::huggingface())?),
620 Provider::TogetherAI => Box::new(GenericAdapter::new(ProviderConfigs::together_ai())?),
621 Provider::OpenRouter => Box::new(GenericAdapter::new(ProviderConfigs::openrouter())?),
622 Provider::Replicate => Box::new(GenericAdapter::new(ProviderConfigs::replicate())?),
623 Provider::ZhipuAI => Box::new(GenericAdapter::new(ProviderConfigs::zhipu_ai())?),
624 Provider::MiniMax => Box::new(GenericAdapter::new(ProviderConfigs::minimax())?),
625 Provider::OpenAI => Box::new(OpenAiAdapter::new()?),
626 Provider::Gemini => Box::new(GeminiAdapter::new()?),
627 Provider::Mistral => Box::new(MistralAdapter::new()?),
628 Provider::Cohere => Box::new(CohereAdapter::new()?),
629 Provider::Perplexity => Box::new(PerplexityAdapter::new()?),
630 Provider::AI21 => Box::new(AI21Adapter::new()?),
631 };
632
633 Ok(Self {
634 provider,
635 adapter,
636 metrics,
637 connection_options: None,
638 custom_default_chat_model: None,
639 custom_default_multimodal_model: None,
640 backpressure: None,
641 failover_providers: None,
642 #[cfg(feature = "routing_mvp")]
643 routing_array: None,
644 #[cfg(feature = "interceptors")]
645 interceptor_pipeline: None,
646 })
647 }
648
649 /// Set metrics implementation on client
650 pub fn with_metrics(mut self, metrics: Arc<dyn Metrics>) -> Self {
651 self.metrics = metrics;
652 self
653 }
654
655 /// Send chat completion request
656 ///
657 /// This method supports multiple routing and failover strategies that work together:
658 ///
659 /// ## Execution Flow
660 /// 1. **Routing (if enabled)**: If `request.model == "__route__"` and `routing_mvp` feature is enabled,
661 /// the client will select the best available model from the configured `ModelArray` based on
662 /// health checks and load balancing strategy.
663 /// 2. **Request Execution**: The request is sent to the current provider with the selected model.
664 /// 3. **Failover (if enabled)**: If the request fails with a retryable error (network, timeout,
665 /// rate limit, 5xx), the client will automatically try the configured failover providers
666 /// in order, preserving the routing selection.
667 ///
668 /// ## Feature Interaction
669 /// - **Routing + Failover**: When both are configured, routing selection is preserved during
670 /// failover attempts. The failover providers will use the same model selection.
671 /// - **Interceptors**: Applied after routing but before failover, allowing for request/response
672 /// modification and monitoring.
673 ///
674 /// ## Examples
675 /// ```rust
676 /// use ai_lib::{AiClient, Provider, ChatCompletionRequest, Message, Role};
677 /// use ai_lib::types::common::Content;
678 ///
679 /// // Basic usage with failover
680 /// let client = AiClient::new(Provider::OpenAI)?
681 /// .with_failover(vec![Provider::Anthropic, Provider::Groq]);
682 ///
683 /// // With routing (requires routing_mvp feature)
684 /// #[cfg(feature = "routing_mvp")]
685 /// {
686 /// let mut array = ai_lib::provider::models::ModelArray::new("production");
687 /// // ... configure array with endpoints
688 /// let client = client.with_routing_array(array);
689 /// }
690 ///
691 /// // Request with routing (will select best model from array)
692 /// let request = ChatCompletionRequest::new(
693 /// "__route__".to_string(), // Special sentinel for routing
694 /// vec![Message {
695 /// role: Role::User,
696 /// content: Content::Text("Hello".to_string()),
697 /// function_call: None,
698 /// }],
699 /// );
700 /// # Ok::<(), ai_lib::AiLibError>(())
701 /// ```
702 ///
703 /// # Arguments
704 /// * `request` - Chat completion request
705 ///
706 /// # Returns
707 /// * `Result<ChatCompletionResponse, AiLibError>` - Response on success, error on failure
708 #[allow(unused_mut)]
709 pub async fn chat_completion(
710 &self,
711 request: ChatCompletionRequest,
712 ) -> Result<ChatCompletionResponse, AiLibError> {
713 // Acquire backpressure permit if configured
714 let _bp_permit: Option<BackpressurePermit> = if let Some(ctrl) = &self.backpressure {
715 match ctrl.acquire_permit().await {
716 Ok(p) => Some(p),
717 Err(_) => {
718 return Err(AiLibError::RateLimitExceeded(
719 "Backpressure: no permits available".to_string(),
720 ))
721 }
722 }
723 } else {
724 None
725 };
726 // Step 1: Apply routing if configured and model is "__route__"
727 // Note: processed_request needs to be mut because we modify it in the routing_mvp block
728 let mut processed_request = request;
729 #[cfg(feature = "routing_mvp")]
730 {
731 if processed_request.model == "__route__" {
732 let _ = self.metrics.incr_counter("routing_mvp.request", 1).await;
733 let chosen = self.provider.default_chat_model().to_string();
734 if let Some(arr) = &self.routing_array {
735 let mut arr_clone = arr.clone();
736 if let Some(ep) = arr_clone.select_endpoint() {
737 match crate::provider::utils::health_check(&ep.url).await {
738 Ok(()) => {
739 let _ = self.metrics.incr_counter("routing_mvp.selected", 1).await;
740 chosen = ep.model_name.clone();
741 }
742 Err(_) => {
743 let _ = self
744 .metrics
745 .incr_counter("routing_mvp.health_fail", 1)
746 .await;
747 chosen = self.provider.default_chat_model().to_string();
748 let _ = self
749 .metrics
750 .incr_counter("routing_mvp.fallback_default", 1)
751 .await;
752 }
753 }
754 } else {
755 let _ = self
756 .metrics
757 .incr_counter("routing_mvp.no_endpoint", 1)
758 .await;
759 }
760 } else {
761 let _ = self
762 .metrics
763 .incr_counter("routing_mvp.missing_array", 1)
764 .await;
765 }
766 processed_request.model = chosen;
767 }
768 }
769
770 // Step 2: Execute request with potential failover
771 let execute_request = || async {
772 #[cfg(feature = "interceptors")]
773 if let Some(p) = &self.interceptor_pipeline {
774 let ctx = crate::interceptors::RequestContext {
775 provider: format!("{:?}", self.provider).to_lowercase(),
776 model: processed_request.model.clone(),
777 };
778 return p
779 .execute(&ctx, &processed_request, || self.adapter.chat_completion(processed_request.clone()))
780 .await;
781 }
782 self.adapter.chat_completion(processed_request.clone()).await
783 };
784
785 match execute_request().await {
786 Ok(resp) => Ok(resp),
787 Err(e) => {
788 // Step 3: Try failover if configured and error is retryable
789 if let Some(chain) = &self.failover_providers {
790 if e.is_retryable() || matches!(e, AiLibError::TimeoutError(_)) {
791 for p in chain {
792 // Skip if same as current
793 if *p == self.provider { continue; }
794 let _ = self.metrics.incr_counter("failover.attempts", 1).await;
795
796 // Create failover client with same configuration
797 let failover_client = AiClient::new_with_metrics(*p, self.metrics.clone())?;
798
799 // Apply same routing logic to failover request if needed
800 let mut failover_request = processed_request.clone();
801 #[cfg(feature = "routing_mvp")]
802 {
803 // For failover, we don't re-apply routing since the model was already chosen
804 // or we use the failover provider's default model
805 if failover_request.model == "__route__" {
806 failover_request.model = p.default_chat_model().to_string();
807 }
808 }
809
810 match failover_client.adapter.chat_completion(failover_request).await {
811 Ok(r) => {
812 let _ = self.metrics.incr_counter("failover.success", 1).await;
813 return Ok(r);
814 }
815 Err(e2) => {
816 let _ = self.metrics.incr_counter("failover.error", 1).await;
817 // Continue to next provider on retryable
818 if !(e2.is_retryable() || matches!(e2, AiLibError::TimeoutError(_))) {
819 return Err(e2);
820 }
821 }
822 }
823 }
824 }
825 }
826 Err(e)
827 }
828 }
829 }
830
831 /// Streaming chat completion request
832 ///
833 /// This method provides the same routing and failover capabilities as `chat_completion()`,
834 /// but returns a streaming response for real-time processing.
835 ///
836 /// ## Execution Flow
837 /// 1. **Routing (if enabled)**: If `request.model == "__route__"` and `routing_mvp` feature is enabled,
838 /// the client will select the best available model from the configured `ModelArray` based on
839 /// health checks and load balancing strategy.
840 /// 2. **Stream Request Execution**: The streaming request is sent to the current provider with the selected model.
841 /// 3. **Failover (if enabled)**: If the stream request fails with a retryable error (network, timeout,
842 /// rate limit, 5xx), the client will automatically try the configured failover providers
843 /// in order, preserving the routing selection.
844 ///
845 /// ## Feature Interaction
846 /// - **Routing + Failover**: When both are configured, routing selection is preserved during
847 /// failover attempts. The failover providers will use the same model selection.
848 /// - **Interceptors**: Applied after routing but before failover, allowing for request/response
849 /// modification and monitoring.
850 /// - **Backpressure**: The configured backpressure controller is applied to the final stream.
851 ///
852 /// ## Examples
853 /// ```rust
854 /// use ai_lib::{AiClient, Provider, ChatCompletionRequest, Message, Role};
855 /// use ai_lib::types::common::Content;
856 /// use futures::stream::StreamExt;
857 ///
858 /// # async fn example() -> Result<(), ai_lib::AiLibError> {
859 /// // Basic usage with failover
860 /// let client = AiClient::new(Provider::OpenAI)?
861 /// .with_failover(vec![Provider::Anthropic, Provider::Groq]);
862 ///
863 /// // With routing (requires routing_mvp feature)
864 /// #[cfg(feature = "routing_mvp")]
865 /// {
866 /// let mut array = ai_lib::provider::models::ModelArray::new("production");
867 /// // ... configure array with endpoints
868 /// let client = client.with_routing_array(array);
869 /// }
870 ///
871 /// // Streaming request with routing (will select best model from array)
872 /// let request = ChatCompletionRequest::new(
873 /// "__route__".to_string(), // Special sentinel for routing
874 /// vec![Message {
875 /// role: Role::User,
876 /// content: Content::Text("Hello".to_string()),
877 /// function_call: None,
878 /// }],
879 /// );
880 ///
881 /// let mut stream = client.chat_completion_stream(request).await?;
882 /// while let Some(chunk) = stream.next().await {
883 /// match chunk {
884 /// Ok(chunk) => println!("Received: {:?}", chunk),
885 /// Err(e) => eprintln!("Stream error: {}", e),
886 /// }
887 /// }
888 /// # Ok(())
889 /// # }
890 /// ```
891 ///
892 /// # Arguments
893 /// * `request` - Chat completion request
894 ///
895 /// # Returns
896 /// * `Result<impl Stream<Item = Result<ChatCompletionChunk, AiLibError>>, AiLibError>` - Stream response on success
897 #[allow(unused_mut)]
898 pub async fn chat_completion_stream(
899 &self,
900 mut request: ChatCompletionRequest,
901 ) -> Result<
902 Box<dyn Stream<Item = Result<ChatCompletionChunk, AiLibError>> + Send + Unpin>,
903 AiLibError,
904 > {
905 request.stream = Some(true);
906 // Acquire backpressure permit if configured and hold it for the lifetime of the stream
907 let bp_permit: Option<BackpressurePermit> = if let Some(ctrl) = &self.backpressure {
908 match ctrl.acquire_permit().await {
909 Ok(p) => Some(p),
910 Err(_) => {
911 return Err(AiLibError::RateLimitExceeded(
912 "Backpressure: no permits available".to_string(),
913 ))
914 }
915 }
916 } else {
917 None
918 };
919 // Step 1: Apply routing if configured and model is "__route__"
920 // Note: processed_request needs to be mut because we modify it in the routing_mvp block
921 let mut processed_request = request;
922 #[cfg(feature = "routing_mvp")]
923 {
924 if processed_request.model == "__route__" {
925 let _ = self.metrics.incr_counter("routing_mvp.request", 1).await;
926 let chosen = self.provider.default_chat_model().to_string();
927 if let Some(arr) = &self.routing_array {
928 let mut arr_clone = arr.clone();
929 if let Some(ep) = arr_clone.select_endpoint() {
930 match crate::provider::utils::health_check(&ep.url).await {
931 Ok(()) => {
932 let _ = self.metrics.incr_counter("routing_mvp.selected", 1).await;
933 chosen = ep.model_name.clone();
934 }
935 Err(_) => {
936 let _ = self
937 .metrics
938 .incr_counter("routing_mvp.health_fail", 1)
939 .await;
940 chosen = self.provider.default_chat_model().to_string();
941 let _ = self
942 .metrics
943 .incr_counter("routing_mvp.fallback_default", 1)
944 .await;
945 }
946 }
947 } else {
948 let _ = self
949 .metrics
950 .incr_counter("routing_mvp.no_endpoint", 1)
951 .await;
952 }
953 } else {
954 let _ = self
955 .metrics
956 .incr_counter("routing_mvp.missing_array", 1)
957 .await;
958 }
959 processed_request.model = chosen;
960 }
961 }
962
963 // Step 2: Execute stream request with potential failover
964 let execute_stream = || async {
965 #[cfg(feature = "interceptors")]
966 if let Some(p) = &self.interceptor_pipeline {
967 let ctx = crate::interceptors::RequestContext {
968 provider: format!("{:?}", self.provider).to_lowercase(),
969 model: processed_request.model.clone(),
970 };
971 // Wrap stream request by executing core first and then mapping events
972 for ic in &p.interceptors {
973 ic.on_request(&ctx, &processed_request).await;
974 }
975 return self.adapter.chat_completion_stream(processed_request).await;
976 }
977 self.adapter.chat_completion_stream(processed_request.clone()).await
978 };
979
980 match execute_stream().await {
981 Ok(inner) => {
982 let cs = ControlledStream::new_with_bp(inner, None, bp_permit);
983 Ok(Box::new(cs))
984 }
985 Err(e) => {
986 // Step 3: Try failover if configured and error is retryable
987 if let Some(chain) = &self.failover_providers {
988 if e.is_retryable() || matches!(e, AiLibError::TimeoutError(_)) {
989 for p in chain {
990 if *p == self.provider { continue; }
991 let _ = self.metrics.incr_counter("failover.attempts", 1).await;
992
993 // Create failover client with same configuration
994 let failover_client = AiClient::new_with_metrics(*p, self.metrics.clone())?;
995
996 // Apply same routing logic to failover request if needed
997 let mut failover_request = processed_request.clone();
998 #[cfg(feature = "routing_mvp")]
999 {
1000 // For failover, we don't re-apply routing since the model was already chosen
1001 // or we use the failover provider's default model
1002 if failover_request.model == "__route__" {
1003 failover_request.model = p.default_chat_model().to_string();
1004 }
1005 }
1006
1007 match failover_client.adapter.chat_completion_stream(failover_request).await {
1008 Ok(inner) => {
1009 let _ = self.metrics.incr_counter("failover.success", 1).await;
1010 let cs = ControlledStream::new_with_bp(inner, None, bp_permit);
1011 return Ok(Box::new(cs));
1012 }
1013 Err(e2) => {
1014 let _ = self.metrics.incr_counter("failover.error", 1).await;
1015 if !(e2.is_retryable() || matches!(e2, AiLibError::TimeoutError(_))) {
1016 return Err(e2);
1017 }
1018 }
1019 }
1020 }
1021 }
1022 }
1023 Err(e)
1024 }
1025 }
1026 }
1027
1028 /// Streaming chat completion request with cancel control
1029 ///
1030 /// # Arguments
1031 /// * `request` - Chat completion request
1032 ///
1033 /// # Returns
1034 /// * `Result<(impl Stream<Item = Result<ChatCompletionChunk, AiLibError>> + Send + Unpin, CancelHandle), AiLibError>` - Returns streaming response and cancel handle on success
1035 pub async fn chat_completion_stream_with_cancel(
1036 &self,
1037 mut request: ChatCompletionRequest,
1038 ) -> Result<
1039 (
1040 Box<dyn Stream<Item = Result<ChatCompletionChunk, AiLibError>> + Send + Unpin>,
1041 CancelHandle,
1042 ),
1043 AiLibError,
1044 > {
1045 request.stream = Some(true);
1046 // Acquire backpressure permit if configured and hold it for the lifetime of the stream
1047 let bp_permit: Option<BackpressurePermit> = if let Some(ctrl) = &self.backpressure {
1048 match ctrl.acquire_permit().await {
1049 Ok(p) => Some(p),
1050 Err(_) => {
1051 return Err(AiLibError::RateLimitExceeded(
1052 "Backpressure: no permits available".to_string(),
1053 ))
1054 }
1055 }
1056 } else {
1057 None
1058 };
1059 let stream = self.adapter.chat_completion_stream(request).await?;
1060 let (cancel_tx, cancel_rx) = oneshot::channel();
1061 let cancel_handle = CancelHandle {
1062 sender: Some(cancel_tx),
1063 };
1064
1065 let controlled_stream = ControlledStream::new_with_bp(stream, Some(cancel_rx), bp_permit);
1066 Ok((Box::new(controlled_stream), cancel_handle))
1067 }
1068
1069 /// Batch chat completion requests
1070 ///
1071 /// # Arguments
1072 /// * `requests` - List of chat completion requests
1073 /// * `concurrency_limit` - Maximum concurrent request count (None means unlimited)
1074 ///
1075 /// # Returns
1076 /// * `Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError>` - Returns response results for all requests
1077 ///
1078 /// # Example
1079 /// ```rust
1080 /// use ai_lib::{AiClient, Provider, ChatCompletionRequest, Message, Role};
1081 /// use ai_lib::types::common::Content;
1082 ///
1083 /// #[tokio::main]
1084 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
1085 /// let client = AiClient::new(Provider::Groq)?;
1086 ///
1087 /// let requests = vec![
1088 /// ChatCompletionRequest::new(
1089 /// "llama3-8b-8192".to_string(),
1090 /// vec![Message {
1091 /// role: Role::User,
1092 /// content: Content::Text("Hello".to_string()),
1093 /// function_call: None,
1094 /// }],
1095 /// ),
1096 /// ChatCompletionRequest::new(
1097 /// "llama3-8b-8192".to_string(),
1098 /// vec![Message {
1099 /// role: Role::User,
1100 /// content: Content::Text("How are you?".to_string()),
1101 /// function_call: None,
1102 /// }],
1103 /// ),
1104 /// ];
1105 ///
1106 /// // Limit concurrency to 5
1107 /// let responses = client.chat_completion_batch(requests, Some(5)).await?;
1108 ///
1109 /// for (i, response) in responses.iter().enumerate() {
1110 /// match response {
1111 /// Ok(resp) => println!("Request {}: {}", i, resp.choices[0].message.content.as_text()),
1112 /// Err(e) => println!("Request {} failed: {}", i, e),
1113 /// }
1114 /// }
1115 ///
1116 /// Ok(())
1117 /// }
1118 /// ```
1119 pub async fn chat_completion_batch(
1120 &self,
1121 requests: Vec<ChatCompletionRequest>,
1122 concurrency_limit: Option<usize>,
1123 ) -> Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError> {
1124 self.adapter
1125 .chat_completion_batch(requests, concurrency_limit)
1126 .await
1127 }
1128
1129 /// Smart batch processing: automatically choose processing strategy based on request count
1130 ///
1131 /// # Arguments
1132 /// * `requests` - List of chat completion requests
1133 ///
1134 /// # Returns
1135 /// * `Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError>` - Returns response results for all requests
1136 pub async fn chat_completion_batch_smart(
1137 &self,
1138 requests: Vec<ChatCompletionRequest>,
1139 ) -> Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError> {
1140 // Use sequential processing for small batches, concurrent processing for large batches
1141 let concurrency_limit = if requests.len() <= 3 { None } else { Some(10) };
1142 self.chat_completion_batch(requests, concurrency_limit)
1143 .await
1144 }
1145
1146 /// Batch chat completion requests
1147 ///
1148 /// # Arguments
1149 /// * `requests` - List of chat completion requests
1150 /// * `concurrency_limit` - Maximum concurrent request count (None means unlimited)
1151 ///
1152 /// # Returns
1153 /// * `Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError>` - Returns response results for all requests
1154 ///
1155 /// # Example
1156 /// ```rust
1157 /// use ai_lib::{AiClient, Provider, ChatCompletionRequest, Message, Role};
1158 /// use ai_lib::types::common::Content;
1159 ///
1160 /// #[tokio::main]
1161 /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
1162 /// let client = AiClient::new(Provider::Groq)?;
1163 ///
1164 /// let requests = vec![
1165 /// ChatCompletionRequest::new(
1166 /// "llama3-8b-8192".to_string(),
1167 /// vec![Message {
1168 /// role: Role::User,
1169 /// content: Content::Text("Hello".to_string()),
1170 /// function_call: None,
1171 /// }],
1172 /// ),
1173 /// ChatCompletionRequest::new(
1174 /// "llama3-8b-8192".to_string(),
1175 /// vec![Message {
1176 /// role: Role::User,
1177 /// content: Content::Text("How are you?".to_string()),
1178 /// function_call: None,
1179 /// }],
1180 /// ),
1181 /// ];
1182 ///
1183 /// // Limit concurrency to 5
1184 /// let responses = client.chat_completion_batch(requests, Some(5)).await?;
1185 ///
1186 /// for (i, response) in responses.iter().enumerate() {
1187 /// match response {
1188 /// Ok(resp) => println!("Request {}: {}", i, resp.choices[0].message.content.as_text()),
1189 /// Err(e) => println!("Request {} failed: {}", i, e),
1190 /// }
1191 /// }
1192 ///
1193 /// Ok(())
1194 /// }
1195 /// ```
1196 ///
1197 /// Get list of supported models
1198 ///
1199 /// # Returns
1200 /// * `Result<Vec<String>, AiLibError>` - Returns model list on success, error on failure
1201 pub async fn list_models(&self) -> Result<Vec<String>, AiLibError> {
1202 self.adapter.list_models().await
1203 }
1204
1205 /// Switch AI model provider
1206 ///
1207 /// # Arguments
1208 /// * `provider` - New provider
1209 ///
1210 /// # Returns
1211 /// * `Result<(), AiLibError>` - Returns () on success, error on failure
1212 ///
1213 /// # Example
1214 /// ```rust
1215 /// use ai_lib::{AiClient, Provider};
1216 ///
1217 /// let mut client = AiClient::new(Provider::Groq)?;
1218 /// // Switch from Groq to Groq (demonstrating switch functionality)
1219 /// client.switch_provider(Provider::Groq)?;
1220 /// # Ok::<(), ai_lib::AiLibError>(())
1221 /// ```
1222 pub fn switch_provider(&mut self, provider: Provider) -> Result<(), AiLibError> {
1223 let new_adapter: Box<dyn ChatApi> = match provider {
1224 Provider::Groq => Box::new(GenericAdapter::new(ProviderConfigs::groq())?),
1225 Provider::XaiGrok => Box::new(GenericAdapter::new(ProviderConfigs::xai_grok())?),
1226 Provider::Ollama => Box::new(GenericAdapter::new(ProviderConfigs::ollama())?),
1227 Provider::DeepSeek => Box::new(GenericAdapter::new(ProviderConfigs::deepseek())?),
1228 Provider::Qwen => Box::new(GenericAdapter::new(ProviderConfigs::qwen())?),
1229 Provider::OpenAI => Box::new(OpenAiAdapter::new()?),
1230 Provider::Anthropic => Box::new(GenericAdapter::new(ProviderConfigs::anthropic())?),
1231 Provider::BaiduWenxin => {
1232 Box::new(GenericAdapter::new(ProviderConfigs::baidu_wenxin())?)
1233 }
1234 Provider::TencentHunyuan => {
1235 Box::new(GenericAdapter::new(ProviderConfigs::tencent_hunyuan())?)
1236 }
1237 Provider::IflytekSpark => {
1238 Box::new(GenericAdapter::new(ProviderConfigs::iflytek_spark())?)
1239 }
1240 Provider::Moonshot => Box::new(GenericAdapter::new(ProviderConfigs::moonshot())?),
1241 Provider::Gemini => Box::new(GeminiAdapter::new()?),
1242 Provider::AzureOpenAI => {
1243 Box::new(GenericAdapter::new(ProviderConfigs::azure_openai())?)
1244 }
1245 Provider::HuggingFace => Box::new(GenericAdapter::new(ProviderConfigs::huggingface())?),
1246 Provider::TogetherAI => Box::new(GenericAdapter::new(ProviderConfigs::together_ai())?),
1247 Provider::OpenRouter => Box::new(GenericAdapter::new(ProviderConfigs::openrouter())?),
1248 Provider::Replicate => Box::new(GenericAdapter::new(ProviderConfigs::replicate())?),
1249 Provider::ZhipuAI => Box::new(GenericAdapter::new(ProviderConfigs::zhipu_ai())?),
1250 Provider::MiniMax => Box::new(GenericAdapter::new(ProviderConfigs::minimax())?),
1251 Provider::Mistral => Box::new(MistralAdapter::new()?),
1252 Provider::Cohere => Box::new(CohereAdapter::new()?),
1253 Provider::Perplexity => Box::new(PerplexityAdapter::new()?),
1254 Provider::AI21 => Box::new(AI21Adapter::new()?),
1255 // Provider::Bedrock => Box::new(BedrockAdapter::new()?),
1256 };
1257
1258 self.provider = provider;
1259 self.adapter = new_adapter;
1260 Ok(())
1261 }
1262
1263 /// Get current provider
1264 pub fn current_provider(&self) -> Provider {
1265 self.provider
1266 }
1267
1268 /// Convenience helper: construct a request with the provider's default chat model.
1269 /// This does NOT send the request.
1270 /// Uses custom default model if set via AiClientBuilder, otherwise uses provider default.
1271 pub fn build_simple_request<S: Into<String>>(&self, prompt: S) -> ChatCompletionRequest {
1272 let model = self
1273 .custom_default_chat_model
1274 .clone()
1275 .unwrap_or_else(|| self.provider.default_chat_model().to_string());
1276
1277 ChatCompletionRequest::new(
1278 model,
1279 vec![crate::types::Message {
1280 role: crate::types::Role::User,
1281 content: crate::types::common::Content::Text(prompt.into()),
1282 function_call: None,
1283 }],
1284 )
1285 }
1286
1287 /// Convenience helper: construct a request with an explicitly specified chat model.
1288 /// This does NOT send the request.
1289 pub fn build_simple_request_with_model<S: Into<String>>(
1290 &self,
1291 prompt: S,
1292 model: S,
1293 ) -> ChatCompletionRequest {
1294 ChatCompletionRequest::new(
1295 model.into(),
1296 vec![crate::types::Message {
1297 role: crate::types::Role::User,
1298 content: crate::types::common::Content::Text(prompt.into()),
1299 function_call: None,
1300 }],
1301 )
1302 }
1303
1304 /// Convenience helper: construct a request with the provider's default multimodal model.
1305 /// This does NOT send the request.
1306 /// Uses custom default model if set via AiClientBuilder, otherwise uses provider default.
1307 pub fn build_multimodal_request<S: Into<String>>(
1308 &self,
1309 prompt: S,
1310 ) -> Result<ChatCompletionRequest, AiLibError> {
1311 let model = self
1312 .custom_default_multimodal_model
1313 .clone()
1314 .or_else(|| {
1315 self.provider
1316 .default_multimodal_model()
1317 .map(|s| s.to_string())
1318 })
1319 .ok_or_else(|| {
1320 AiLibError::ConfigurationError(format!(
1321 "No multimodal model available for provider {:?}",
1322 self.provider
1323 ))
1324 })?;
1325
1326 Ok(ChatCompletionRequest::new(
1327 model,
1328 vec![crate::types::Message {
1329 role: crate::types::Role::User,
1330 content: crate::types::common::Content::Text(prompt.into()),
1331 function_call: None,
1332 }],
1333 ))
1334 }
1335
1336 /// Convenience helper: construct a request with an explicitly specified multimodal model.
1337 /// This does NOT send the request.
1338 pub fn build_multimodal_request_with_model<S: Into<String>>(
1339 &self,
1340 prompt: S,
1341 model: S,
1342 ) -> ChatCompletionRequest {
1343 ChatCompletionRequest::new(
1344 model.into(),
1345 vec![crate::types::Message {
1346 role: crate::types::Role::User,
1347 content: crate::types::common::Content::Text(prompt.into()),
1348 function_call: None,
1349 }],
1350 )
1351 }
1352
1353 /// One-shot helper: create a client for `provider`, send a single user prompt using the
1354 /// default chat model, and return plain text content (first choice).
1355 pub async fn quick_chat_text<P: Into<String>>(
1356 provider: Provider,
1357 prompt: P,
1358 ) -> Result<String, AiLibError> {
1359 let client = AiClient::new(provider)?;
1360 let req = client.build_simple_request(prompt.into());
1361 let resp = client.chat_completion(req).await?;
1362 resp.first_text().map(|s| s.to_string())
1363 }
1364
1365 /// One-shot helper: create a client for `provider`, send a single user prompt using an
1366 /// explicitly specified chat model, and return plain text content (first choice).
1367 pub async fn quick_chat_text_with_model<P: Into<String>, M: Into<String>>(
1368 provider: Provider,
1369 prompt: P,
1370 model: M,
1371 ) -> Result<String, AiLibError> {
1372 let client = AiClient::new(provider)?;
1373 let req = client.build_simple_request_with_model(prompt.into(), model.into());
1374 let resp = client.chat_completion(req).await?;
1375 resp.first_text().map(|s| s.to_string())
1376 }
1377
1378 /// One-shot helper: create a client for `provider`, send a single user prompt using the
1379 /// default multimodal model, and return plain text content (first choice).
1380 pub async fn quick_multimodal_text<P: Into<String>>(
1381 provider: Provider,
1382 prompt: P,
1383 ) -> Result<String, AiLibError> {
1384 let client = AiClient::new(provider)?;
1385 let req = client.build_multimodal_request(prompt.into())?;
1386 let resp = client.chat_completion(req).await?;
1387 resp.first_text().map(|s| s.to_string())
1388 }
1389
1390 /// One-shot helper: create a client for `provider`, send a single user prompt using an
1391 /// explicitly specified multimodal model, and return plain text content (first choice).
1392 pub async fn quick_multimodal_text_with_model<P: Into<String>, M: Into<String>>(
1393 provider: Provider,
1394 prompt: P,
1395 model: M,
1396 ) -> Result<String, AiLibError> {
1397 let client = AiClient::new(provider)?;
1398 let req = client.build_multimodal_request_with_model(prompt.into(), model.into());
1399 let resp = client.chat_completion(req).await?;
1400 resp.first_text().map(|s| s.to_string())
1401 }
1402
1403 /// One-shot helper with model options: create a client for `provider`, send a single user prompt
1404 /// using specified model options, and return plain text content (first choice).
1405 pub async fn quick_chat_text_with_options<P: Into<String>>(
1406 provider: Provider,
1407 prompt: P,
1408 options: ModelOptions,
1409 ) -> Result<String, AiLibError> {
1410 let client = AiClient::new(provider)?;
1411
1412 // Determine which model to use based on options
1413 let model = if let Some(chat_model) = options.chat_model {
1414 chat_model
1415 } else {
1416 provider.default_chat_model().to_string()
1417 };
1418
1419 let req = client.build_simple_request_with_model(prompt.into(), model);
1420 let resp = client.chat_completion(req).await?;
1421 resp.first_text().map(|s| s.to_string())
1422 }
1423
1424 /// Upload a local file using provider's multipart endpoint and return a URL or file id.
1425 ///
1426 /// Behavior:
1427 /// - For config-driven providers, uses their configured `upload_endpoint` if available.
1428 /// - For OpenAI, posts to `{base_url}/files`.
1429 /// - Providers without a known upload endpoint return `UnsupportedFeature`.
1430 pub async fn upload_file(&self, path: &str) -> Result<String, AiLibError> {
1431 // Determine base_url precedence: explicit connection_options > provider default
1432 let base_url = if let Some(opts) = &self.connection_options {
1433 if let Some(b) = &opts.base_url {
1434 b.clone()
1435 } else {
1436 self.provider_default_base_url()?
1437 }
1438 } else {
1439 self.provider_default_base_url()?
1440 };
1441
1442 // Determine upload endpoint
1443 let endpoint: Option<String> = if self.provider.is_config_driven() {
1444 // Use provider default config to discover upload endpoint
1445 let cfg = self.provider.get_default_config()?;
1446 cfg.upload_endpoint.clone()
1447 } else {
1448 match self.provider {
1449 Provider::OpenAI => Some("/files".to_string()),
1450 _ => None,
1451 }
1452 };
1453
1454 let endpoint = endpoint.ok_or_else(|| {
1455 AiLibError::UnsupportedFeature(format!(
1456 "Provider {:?} does not expose an upload endpoint in OSS",
1457 self.provider
1458 ))
1459 })?;
1460
1461 // Compose URL (avoid double slashes)
1462 let upload_url = if base_url.ends_with('/') {
1463 format!("{}{}", base_url.trim_end_matches('/'), endpoint)
1464 } else {
1465 format!("{}{}", base_url, endpoint)
1466 };
1467
1468 // Perform upload using unified transport helper (uses injected transport when None)
1469 crate::provider::utils::upload_file_with_transport(None, &upload_url, path, "file").await
1470 }
1471
1472 fn provider_default_base_url(&self) -> Result<String, AiLibError> {
1473 if self.provider.is_config_driven() {
1474 Ok(self.provider.get_default_config()?.base_url)
1475 } else {
1476 match self.provider {
1477 Provider::OpenAI => Ok("https://api.openai.com/v1".to_string()),
1478 Provider::Gemini => {
1479 Ok("https://generativelanguage.googleapis.com/v1beta".to_string())
1480 }
1481 Provider::Mistral => Ok("https://api.mistral.ai".to_string()),
1482 Provider::Cohere => Ok("https://api.cohere.ai".to_string()),
1483 _ => Err(AiLibError::ConfigurationError(
1484 "No default base URL for provider".to_string(),
1485 )),
1486 }
1487 }
1488 }
1489}
1490
1491/// Streaming response cancel handle
1492pub struct CancelHandle {
1493 sender: Option<oneshot::Sender<()>>,
1494}
1495
1496impl CancelHandle {
1497 /// Cancel streaming response
1498 pub fn cancel(mut self) {
1499 if let Some(sender) = self.sender.take() {
1500 let _ = sender.send(());
1501 }
1502 }
1503}
1504
1505/// AI client builder with progressive custom configuration
1506///
1507/// Usage examples:
1508/// ```rust
1509/// use ai_lib::{AiClientBuilder, Provider};
1510///
1511/// // Simplest usage - automatic environment variable detection
1512/// let client = AiClientBuilder::new(Provider::Groq).build()?;
1513///
1514/// // Custom base_url and proxy
1515/// let client = AiClientBuilder::new(Provider::Groq)
1516/// .with_base_url("https://custom.groq.com")
1517/// .with_proxy(Some("http://proxy.example.com:8080"))
1518/// .build()?;
1519///
1520/// // Full custom configuration
1521/// let client = AiClientBuilder::new(Provider::Groq)
1522/// .with_base_url("https://custom.groq.com")
1523/// .with_proxy(Some("http://proxy.example.com:8080"))
1524/// .with_timeout(std::time::Duration::from_secs(60))
1525/// .with_pool_config(32, std::time::Duration::from_secs(90))
1526/// .build()?;
1527/// # Ok::<(), ai_lib::AiLibError>(())
1528/// ```
1529pub struct AiClientBuilder {
1530 provider: Provider,
1531 base_url: Option<String>,
1532 proxy_url: Option<String>,
1533 timeout: Option<std::time::Duration>,
1534 pool_max_idle: Option<usize>,
1535 pool_idle_timeout: Option<std::time::Duration>,
1536 metrics: Option<Arc<dyn Metrics>>,
1537 // Model configuration options
1538 default_chat_model: Option<String>,
1539 default_multimodal_model: Option<String>,
1540 // Resilience configuration
1541 resilience_config: ResilienceConfig,
1542 #[cfg(feature = "routing_mvp")]
1543 routing_array: Option<crate::provider::models::ModelArray>,
1544 #[cfg(feature = "interceptors")]
1545 interceptor_pipeline: Option<crate::interceptors::InterceptorPipeline>,
1546}
1547
1548impl AiClientBuilder {
1549 /// Create a new builder instance
1550 ///
1551 /// # Arguments
1552 /// * `provider` - The AI model provider to use
1553 ///
1554 /// # Returns
1555 /// * `Self` - Builder instance
1556 pub fn new(provider: Provider) -> Self {
1557 Self {
1558 provider,
1559 base_url: None,
1560 proxy_url: None,
1561 timeout: None,
1562 pool_max_idle: None,
1563 pool_idle_timeout: None,
1564 metrics: None,
1565 default_chat_model: None,
1566 default_multimodal_model: None,
1567 resilience_config: ResilienceConfig::default(),
1568 #[cfg(feature = "routing_mvp")]
1569 routing_array: None,
1570 #[cfg(feature = "interceptors")]
1571 interceptor_pipeline: None,
1572 }
1573 }
1574
1575 /// Check if provider is config-driven (uses GenericAdapter)
1576 fn is_config_driven_provider(provider: Provider) -> bool {
1577 provider.is_config_driven()
1578 }
1579
1580 /// Set custom base URL
1581 ///
1582 /// # Arguments
1583 /// * `base_url` - Custom base URL
1584 ///
1585 /// # Returns
1586 /// * `Self` - Builder instance for method chaining
1587 pub fn with_base_url(mut self, base_url: &str) -> Self {
1588 self.base_url = Some(base_url.to_string());
1589 self
1590 }
1591
1592 /// Set custom proxy URL
1593 ///
1594 /// # Arguments
1595 /// * `proxy_url` - Custom proxy URL, or None to use AI_PROXY_URL environment variable
1596 ///
1597 /// # Returns
1598 /// * `Self` - Builder instance for method chaining
1599 ///
1600 /// # Examples
1601 /// ```rust
1602 /// use ai_lib::{AiClientBuilder, Provider};
1603 ///
1604 /// // Use specific proxy URL
1605 /// let client = AiClientBuilder::new(Provider::Groq)
1606 /// .with_proxy(Some("http://proxy.example.com:8080"))
1607 /// .build()?;
1608 ///
1609 /// // Use AI_PROXY_URL environment variable
1610 /// let client = AiClientBuilder::new(Provider::Groq)
1611 /// .with_proxy(None)
1612 /// .build()?;
1613 /// # Ok::<(), ai_lib::AiLibError>(())
1614 /// ```
1615 pub fn with_proxy(mut self, proxy_url: Option<&str>) -> Self {
1616 self.proxy_url = proxy_url.map(|s| s.to_string());
1617 self
1618 }
1619
1620 /// Explicitly disable proxy usage
1621 ///
1622 /// This method ensures that no proxy will be used, regardless of environment variables.
1623 ///
1624 /// # Returns
1625 /// * `Self` - Builder instance for method chaining
1626 ///
1627 /// # Example
1628 /// ```rust
1629 /// use ai_lib::{AiClientBuilder, Provider};
1630 ///
1631 /// let client = AiClientBuilder::new(Provider::Groq)
1632 /// .build()?;
1633 /// # Ok::<(), ai_lib::AiLibError>(())
1634 /// ```
1635 pub fn without_proxy(mut self) -> Self {
1636 self.proxy_url = Some("".to_string());
1637 self
1638 }
1639
1640 /// Set custom timeout duration
1641 ///
1642 /// # Arguments
1643 /// * `timeout` - Custom timeout duration
1644 ///
1645 /// # Returns
1646 /// * `Self` - Builder instance for method chaining
1647 pub fn with_timeout(mut self, timeout: std::time::Duration) -> Self {
1648 self.timeout = Some(timeout);
1649 self
1650 }
1651
1652 /// Set connection pool configuration
1653 ///
1654 /// # Arguments
1655 /// * `max_idle` - Maximum idle connections per host
1656 /// * `idle_timeout` - Idle connection timeout duration
1657 ///
1658 /// # Returns
1659 /// * `Self` - Builder instance for method chaining
1660 pub fn with_pool_config(mut self, max_idle: usize, idle_timeout: std::time::Duration) -> Self {
1661 self.pool_max_idle = Some(max_idle);
1662 self.pool_idle_timeout = Some(idle_timeout);
1663 self
1664 }
1665
1666 /// Set custom metrics implementation
1667 ///
1668 /// # Arguments
1669 /// * `metrics` - Custom metrics implementation
1670 ///
1671 /// # Returns
1672 /// * `Self` - Builder instance for method chaining
1673 pub fn with_metrics(mut self, metrics: Arc<dyn Metrics>) -> Self {
1674 self.metrics = Some(metrics);
1675 self
1676 }
1677
1678 #[cfg(feature = "interceptors")]
1679 pub fn with_interceptor_pipeline(
1680 mut self,
1681 pipeline: crate::interceptors::InterceptorPipeline,
1682 ) -> Self {
1683 self.interceptor_pipeline = Some(pipeline);
1684 self
1685 }
1686
1687 #[cfg(feature = "interceptors")]
1688 pub fn enable_default_interceptors(mut self) -> Self {
1689 let p = crate::interceptors::create_default_interceptors();
1690 self.interceptor_pipeline = Some(p);
1691 self
1692 }
1693
1694 #[cfg(feature = "interceptors")]
1695 pub fn enable_minimal_interceptors(mut self) -> Self {
1696 let p = crate::interceptors::default::DefaultInterceptorsBuilder::new()
1697 .enable_circuit_breaker(false)
1698 .enable_rate_limit(false)
1699 .build();
1700 self.interceptor_pipeline = Some(p);
1701 self
1702 }
1703
1704 /// Set default chat model for the client
1705 ///
1706 /// # Arguments
1707 /// * `model` - Default chat model name
1708 ///
1709 /// # Returns
1710 /// * `Self` - Builder instance for method chaining
1711 ///
1712 /// # Example
1713 /// ```rust
1714 /// use ai_lib::{AiClientBuilder, Provider};
1715 ///
1716 /// let client = AiClientBuilder::new(Provider::Groq)
1717 /// .with_default_chat_model("llama-3.1-8b-instant")
1718 /// .build()?;
1719 /// # Ok::<(), ai_lib::AiLibError>(())
1720 /// ```
1721 pub fn with_default_chat_model(mut self, model: &str) -> Self {
1722 self.default_chat_model = Some(model.to_string());
1723 self
1724 }
1725
1726 /// Set default multimodal model for the client
1727 ///
1728 /// # Arguments
1729 /// * `model` - Default multimodal model name
1730 ///
1731 /// # Returns
1732 /// * `Self` - Builder instance for method chaining
1733 ///
1734 /// # Example
1735 /// ```rust
1736 /// use ai_lib::{AiClientBuilder, Provider};
1737 ///
1738 /// let client = AiClientBuilder::new(Provider::Groq)
1739 /// .with_default_multimodal_model("llama-3.2-11b-vision")
1740 /// .build()?;
1741 /// # Ok::<(), ai_lib::AiLibError>(())
1742 /// ```
1743 pub fn with_default_multimodal_model(mut self, model: &str) -> Self {
1744 self.default_multimodal_model = Some(model.to_string());
1745 self
1746 }
1747
1748 /// Enable smart defaults for resilience features
1749 ///
1750 /// This method enables reasonable default configurations for circuit breaker,
1751 /// rate limiting, and error handling without requiring detailed configuration.
1752 ///
1753 /// # Returns
1754 /// * `Self` - Builder instance for method chaining
1755 ///
1756 /// # Example
1757 /// ```rust
1758 /// use ai_lib::{AiClientBuilder, Provider};
1759 ///
1760 /// let client = AiClientBuilder::new(Provider::Groq)
1761 /// .with_smart_defaults()
1762 /// .build()?;
1763 /// # Ok::<(), ai_lib::AiLibError>(())
1764 /// ```
1765 pub fn with_smart_defaults(mut self) -> Self {
1766 self.resilience_config = ResilienceConfig::smart_defaults();
1767 self
1768 }
1769
1770 /// Configure for production environment
1771 ///
1772 /// This method applies production-ready configurations for all resilience
1773 /// features with conservative settings for maximum reliability.
1774 ///
1775 /// # Returns
1776 /// * `Self` - Builder instance for method chaining
1777 ///
1778 /// # Example
1779 /// ```rust
1780 /// use ai_lib::{AiClientBuilder, Provider};
1781 ///
1782 /// let client = AiClientBuilder::new(Provider::Groq)
1783 /// .for_production()
1784 /// .build()?;
1785 /// # Ok::<(), ai_lib::AiLibError>(())
1786 /// ```
1787 pub fn for_production(mut self) -> Self {
1788 self.resilience_config = ResilienceConfig::production();
1789 self
1790 }
1791
1792 /// Configure for development environment
1793 ///
1794 /// This method applies development-friendly configurations with more
1795 /// lenient settings for easier debugging and testing.
1796 ///
1797 /// # Returns
1798 /// * `Self` - Builder instance for method chaining
1799 ///
1800 /// # Example
1801 /// ```rust
1802 /// use ai_lib::{AiClientBuilder, Provider};
1803 ///
1804 /// let client = AiClientBuilder::new(Provider::Groq)
1805 /// .for_development()
1806 /// .build()?;
1807 /// # Ok::<(), ai_lib::AiLibError>(())
1808 /// ```
1809 pub fn for_development(mut self) -> Self {
1810 self.resilience_config = ResilienceConfig::development();
1811 self
1812 }
1813
1814 /// Configure a simple max concurrent requests backpressure guard
1815 ///
1816 /// This provides a convenient way to set a global concurrency cap using a semaphore.
1817 /// It is equivalent于设置 `ResilienceConfig.backpressure.max_concurrent_requests`。
1818 pub fn with_max_concurrency(mut self, max_concurrent_requests: usize) -> Self {
1819 let mut cfg = self.resilience_config.clone();
1820 cfg.backpressure = Some(crate::config::BackpressureConfig { max_concurrent_requests });
1821 self.resilience_config = cfg;
1822 self
1823 }
1824
1825 /// Set custom resilience configuration
1826 ///
1827 /// # Arguments
1828 /// * `config` - Custom resilience configuration
1829 ///
1830 /// # Returns
1831 /// * `Self` - Builder instance for method chaining
1832 pub fn with_resilience_config(mut self, config: ResilienceConfig) -> Self {
1833 self.resilience_config = config;
1834 self
1835 }
1836
1837 #[cfg(feature = "routing_mvp")]
1838 /// Provide a `ModelArray` for client-side routing and fallback.
1839 pub fn with_routing_array(mut self, array: crate::provider::models::ModelArray) -> Self {
1840 self.routing_array = Some(array);
1841 self
1842 }
1843
1844 /// Build AiClient instance
1845 ///
1846 /// The build process applies configuration in the following priority order:
1847 /// 1. Explicitly set configuration (via with_* methods)
1848 /// 2. Environment variable configuration
1849 /// 3. Default configuration
1850 ///
1851 /// # Returns
1852 /// * `Result<AiClient, AiLibError>` - Returns client instance on success, error on failure
1853 pub fn build(self) -> Result<AiClient, AiLibError> {
1854 // 1. Determine base_url: explicit setting > environment variable > default
1855 let base_url = self.determine_base_url()?;
1856
1857 // 2. Determine proxy_url: explicit setting > environment variable
1858 let proxy_url = self.determine_proxy_url();
1859
1860 // 3. Determine timeout: explicit setting > default
1861 let timeout = self
1862 .timeout
1863 .unwrap_or_else(|| std::time::Duration::from_secs(30));
1864
1865 // 4. Create adapter
1866 let adapter: Box<dyn ChatApi> = if Self::is_config_driven_provider(self.provider) {
1867 // All config-driven providers use the same logic - much cleaner!
1868 // Create custom ProviderConfig (if needed)
1869 let config = self.create_custom_config(base_url)?;
1870 // Create custom HttpTransport (if needed)
1871 let transport = self.create_custom_transport(proxy_url.clone(), timeout)?;
1872 create_generic_adapter(config, transport)?
1873 } else {
1874 // Independent adapters - simple one-liners
1875 match self.provider {
1876 Provider::OpenAI => Box::new(OpenAiAdapter::new()?),
1877 Provider::Gemini => Box::new(GeminiAdapter::new()?),
1878 Provider::Mistral => Box::new(MistralAdapter::new()?),
1879 Provider::Cohere => Box::new(CohereAdapter::new()?),
1880 _ => unreachable!("All providers should be handled by now"),
1881 }
1882 };
1883
1884 // 5. Build backpressure controller if configured
1885 let bp_ctrl: Option<Arc<BackpressureController>> = self
1886 .resilience_config
1887 .backpressure
1888 .as_ref()
1889 .map(|cfg| Arc::new(BackpressureController::new(cfg.max_concurrent_requests)));
1890
1891 // 6. Create AiClient
1892 let client = AiClient {
1893 provider: self.provider,
1894 adapter,
1895 metrics: self.metrics.unwrap_or_else(|| Arc::new(NoopMetrics::new())),
1896 connection_options: None,
1897 custom_default_chat_model: self.default_chat_model,
1898 custom_default_multimodal_model: self.default_multimodal_model,
1899 backpressure: bp_ctrl,
1900 failover_providers: None,
1901 #[cfg(feature = "routing_mvp")]
1902 routing_array: self.routing_array,
1903 #[cfg(feature = "interceptors")]
1904 interceptor_pipeline: self.interceptor_pipeline,
1905 };
1906
1907 Ok(client)
1908 }
1909
1910 /// Determine base_url, priority: explicit setting > environment variable > default
1911 fn determine_base_url(&self) -> Result<String, AiLibError> {
1912 // 1. Explicitly set base_url
1913 if let Some(ref base_url) = self.base_url {
1914 return Ok(base_url.clone());
1915 }
1916
1917 // 2. base_url from environment variable
1918 let env_var_name = self.get_base_url_env_var_name();
1919 if let Ok(base_url) = std::env::var(&env_var_name) {
1920 return Ok(base_url);
1921 }
1922
1923 // 3. Use default configuration (only for config-driven providers)
1924 if Self::is_config_driven_provider(self.provider) {
1925 let default_config = self.get_default_provider_config()?;
1926 Ok(default_config.base_url)
1927 } else {
1928 // For independent providers, return a default base URL
1929 match self.provider {
1930 Provider::OpenAI => Ok("https://api.openai.com".to_string()),
1931 Provider::Gemini => Ok("https://generativelanguage.googleapis.com".to_string()),
1932 Provider::Mistral => Ok("https://api.mistral.ai".to_string()),
1933 Provider::Cohere => Ok("https://api.cohere.ai".to_string()),
1934 _ => Err(AiLibError::ConfigurationError(
1935 "Unknown provider for base URL determination".to_string(),
1936 )),
1937 }
1938 }
1939 }
1940
1941 /// Determine proxy_url, priority: explicit setting > environment variable
1942 fn determine_proxy_url(&self) -> Option<String> {
1943 // 1. Explicitly set proxy_url
1944 if let Some(ref proxy_url) = self.proxy_url {
1945 // If proxy_url is empty string, it means explicitly no proxy
1946 if proxy_url.is_empty() {
1947 return None;
1948 }
1949 return Some(proxy_url.clone());
1950 }
1951
1952 // 2. AI_PROXY_URL from environment variable
1953 std::env::var("AI_PROXY_URL").ok()
1954 }
1955
1956 /// Get environment variable name for corresponding provider
1957 fn get_base_url_env_var_name(&self) -> String {
1958 match self.provider {
1959 Provider::Groq => "GROQ_BASE_URL".to_string(),
1960 Provider::XaiGrok => "GROK_BASE_URL".to_string(),
1961 Provider::Ollama => "OLLAMA_BASE_URL".to_string(),
1962 Provider::DeepSeek => "DEEPSEEK_BASE_URL".to_string(),
1963 Provider::Qwen => "DASHSCOPE_BASE_URL".to_string(),
1964 Provider::BaiduWenxin => "BAIDU_WENXIN_BASE_URL".to_string(),
1965 Provider::TencentHunyuan => "TENCENT_HUNYUAN_BASE_URL".to_string(),
1966 Provider::IflytekSpark => "IFLYTEK_BASE_URL".to_string(),
1967 Provider::Moonshot => "MOONSHOT_BASE_URL".to_string(),
1968 Provider::Anthropic => "ANTHROPIC_BASE_URL".to_string(),
1969 Provider::AzureOpenAI => "AZURE_OPENAI_BASE_URL".to_string(),
1970 Provider::HuggingFace => "HUGGINGFACE_BASE_URL".to_string(),
1971 Provider::TogetherAI => "TOGETHER_BASE_URL".to_string(),
1972 Provider::OpenRouter => "OPENROUTER_BASE_URL".to_string(),
1973 Provider::Replicate => "REPLICATE_BASE_URL".to_string(),
1974 Provider::ZhipuAI => "ZHIPU_BASE_URL".to_string(),
1975 Provider::MiniMax => "MINIMAX_BASE_URL".to_string(),
1976 Provider::Perplexity => "PERPLEXITY_BASE_URL".to_string(),
1977 Provider::AI21 => "AI21_BASE_URL".to_string(),
1978 // These providers don't support custom base_url
1979 Provider::OpenAI | Provider::Gemini | Provider::Mistral | Provider::Cohere => {
1980 "".to_string()
1981 }
1982 }
1983 }
1984
1985 /// Get default provider configuration
1986 fn get_default_provider_config(
1987 &self,
1988 ) -> Result<crate::provider::config::ProviderConfig, AiLibError> {
1989 self.provider.get_default_config()
1990 }
1991
1992 /// Create custom ProviderConfig
1993 fn create_custom_config(
1994 &self,
1995 base_url: String,
1996 ) -> Result<crate::provider::config::ProviderConfig, AiLibError> {
1997 let mut config = self.get_default_provider_config()?;
1998 config.base_url = base_url;
1999 Ok(config)
2000 }
2001
2002 /// Create custom HttpTransport
2003 fn create_custom_transport(
2004 &self,
2005 proxy_url: Option<String>,
2006 timeout: std::time::Duration,
2007 ) -> Result<Option<crate::transport::DynHttpTransportRef>, AiLibError> {
2008 // If no custom configuration, return None (use default transport)
2009 if proxy_url.is_none() && self.pool_max_idle.is_none() && self.pool_idle_timeout.is_none() {
2010 return Ok(None);
2011 }
2012
2013 // Create custom HttpTransportConfig
2014 let transport_config = crate::transport::HttpTransportConfig {
2015 timeout,
2016 proxy: proxy_url,
2017 pool_max_idle_per_host: self.pool_max_idle,
2018 pool_idle_timeout: self.pool_idle_timeout,
2019 };
2020
2021 // Create custom HttpTransport
2022 let transport = crate::transport::HttpTransport::new_with_config(transport_config)?;
2023 Ok(Some(transport.boxed()))
2024 }
2025}
2026
2027/// Controllable streaming response
2028struct ControlledStream {
2029 inner: Box<dyn Stream<Item = Result<ChatCompletionChunk, AiLibError>> + Send + Unpin>,
2030 cancel_rx: Option<oneshot::Receiver<()>>,
2031 // Hold a backpressure permit for the lifetime of the stream if present
2032 _bp_permit: Option<BackpressurePermit>,
2033}
2034
2035impl ControlledStream {
2036 fn new_with_bp(
2037 inner: Box<dyn Stream<Item = Result<ChatCompletionChunk, AiLibError>> + Send + Unpin>,
2038 cancel_rx: Option<oneshot::Receiver<()>>,
2039 bp_permit: Option<BackpressurePermit>,
2040 ) -> Self {
2041 Self { inner, cancel_rx, _bp_permit: bp_permit }
2042 }
2043}
2044
2045impl Stream for ControlledStream {
2046 type Item = Result<ChatCompletionChunk, AiLibError>;
2047
2048 fn poll_next(
2049 mut self: std::pin::Pin<&mut Self>,
2050 cx: &mut std::task::Context<'_>,
2051 ) -> std::task::Poll<Option<Self::Item>> {
2052 use futures::stream::StreamExt;
2053 use std::task::Poll;
2054
2055 // Check if cancelled
2056 if let Some(ref mut cancel_rx) = self.cancel_rx {
2057 match Future::poll(std::pin::Pin::new(cancel_rx), cx) {
2058 Poll::Ready(_) => {
2059 self.cancel_rx = None;
2060 return Poll::Ready(Some(Err(AiLibError::ProviderError(
2061 "Stream cancelled".to_string(),
2062 ))));
2063 }
2064 Poll::Pending => {}
2065 }
2066 }
2067
2068 // Poll inner stream
2069 self.inner.poll_next_unpin(cx)
2070 }
2071}