ultrafast_models_sdk/
lib.rs

1//! # Ultrafast Models SDK
2//!
3//! A high-performance Rust SDK for interacting with multiple AI/LLM providers
4//! through a unified interface. The SDK provides seamless integration with
5//! various AI services including OpenAI, Anthropic, Google, and more.
6//!
7//! ## Overview
8//!
9//! The Ultrafast Models SDK provides:
10//! - **Unified Interface**: Single API for multiple AI providers
11//! - **Intelligent Routing**: Automatic provider selection and load balancing
12//! - **Circuit Breakers**: Automatic failover and recovery mechanisms
13//! - **Caching Layer**: Built-in response caching for performance
14//! - **Rate Limiting**: Per-provider rate limiting and throttling
15//! - **Error Handling**: Comprehensive error handling and retry logic
16//! - **Metrics Collection**: Performance monitoring and analytics
17//!
18//! ## Supported Providers
19//!
20//! The SDK supports a wide range of AI providers:
21//!
22//! - **OpenAI**: GPT-4, GPT-3.5, and other OpenAI models
23//! - **Anthropic**: Claude-3, Claude-2, and Claude Instant
24//! - **Google**: Gemini Pro, Gemini Pro Vision, and PaLM
25//! - **Azure OpenAI**: Azure-hosted OpenAI models
26//! - **Ollama**: Local and remote Ollama instances
27//! - **Mistral AI**: Mistral 7B, Mixtral, and other models
28//! - **Cohere**: Command, Command R, and other Cohere models
29//! - **Custom Providers**: Extensible provider system
30//!
31//! ## Quick Start
32//!
33//! ```rust
34//! use ultrafast_models_sdk::{UltrafastClient, ChatRequest, Message};
35//!
36//! #[tokio::main]
37//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
38//!     // Create a client with multiple providers
39//!     let client = UltrafastClient::standalone()
40//!         .with_openai("your-openai-key")
41//!         .with_anthropic("your-anthropic-key")
42//!         .with_ollama("http://localhost:11434")
43//!         .build()?;
44//!
45//!     // Create a chat request
46//!     let request = ChatRequest {
47//!         model: "gpt-4".to_string(),
48//!         messages: vec![Message::user("Hello, world!")],
49//!         temperature: Some(0.7),
50//!         max_tokens: Some(100),
51//!         stream: Some(false),
52//!         ..Default::default()
53//!     };
54//!
55//!     // Send the request
56//!     let response = client.chat_completion(request).await?;
57//!     println!("Response: {}", response.choices[0].message.content);
58//!
59//!     Ok(())
60//! }
61//! ```
62//!
63//! ## Client Modes
64//!
65//! The SDK supports two client modes:
66//!
67//! ### Standalone Mode
68//!
69//! Direct provider communication without gateway:
70//!
71//! ```rust
72//! let client = UltrafastClient::standalone()
73//!     .with_openai("your-key")
74//!     .with_anthropic("your-key")
75//!     .build()?;
76//! ```
77//!
78//! ### Gateway Mode
79//!
80//! Communication through the Ultrafast Gateway:
81//!
82//! ```rust
83//! let client = UltrafastClient::gateway("http://localhost:3000")
84//!     .with_api_key("your-gateway-key")
85//!     .build()?;
86//! ```
87//!
88//! ## Routing Strategies
89//!
90//! The SDK provides multiple routing strategies:
91//!
92//! - **Single Provider**: Route all requests to one provider
93//! - **Load Balancing**: Distribute requests across providers
94//! - **Failover**: Primary provider with automatic fallback
95//! - **Conditional Routing**: Route based on request characteristics
96//! - **A/B Testing**: Route requests for testing different providers
97//!
98//! ```rust
99//! use ultrafast_models_sdk::routing::RoutingStrategy;
100//!
101//! // Load balancing with custom weights
102//! let client = UltrafastClient::standalone()
103//!     .with_openai("openai-key")
104//!     .with_anthropic("anthropic-key")
105//!     .with_routing_strategy(RoutingStrategy::LoadBalance {
106//!         weights: vec![0.6, 0.4], // 60% OpenAI, 40% Anthropic
107//!     })
108//!     .build()?;
109//!
110//! // Failover strategy
111//! let client = UltrafastClient::standalone()
112//!     .with_openai("primary-key")
113//!     .with_anthropic("fallback-key")
114//!     .with_routing_strategy(RoutingStrategy::Failover)
115//!     .build()?;
116//! ```
117//!
118//! ## Advanced Features
119//!
120//! ### Circuit Breakers
121//!
122//! Automatic failover and recovery:
123//!
124//! ```rust
125//! use ultrafast_models_sdk::circuit_breaker::CircuitBreakerConfig;
126//!
127//! let client = UltrafastClient::standalone()
128//!     .with_openai("your-key")
129//!     .with_circuit_breaker_config(CircuitBreakerConfig {
130//!         failure_threshold: 5,
131//!         recovery_timeout: Duration::from_secs(60),
132//!         request_timeout: Duration::from_secs(30),
133//!         half_open_max_calls: 3,
134//!     })
135//!     .build()?;
136//! ```
137//!
138//! ### Caching
139//!
140//! Built-in response caching:
141//!
142//! ```rust
143//! use ultrafast_models_sdk::cache::CacheConfig;
144//!
145//! let client = UltrafastClient::standalone()
146//!     .with_openai("your-key")
147//!     .with_cache_config(CacheConfig {
148//!         enabled: true,
149//!         ttl: Duration::from_hours(1),
150//!         max_size: 1000,
151//!     })
152//!     .build()?;
153//! ```
154//!
155//! ### Rate Limiting
156//!
157//! Per-provider rate limiting:
158//!
159//! ```rust
160//! use ultrafast_models_sdk::rate_limiting::RateLimitConfig;
161//!
162//! let client = UltrafastClient::standalone()
163//!     .with_openai("your-key")
164//!     .with_rate_limit_config(RateLimitConfig {
165//!         requests_per_minute: 100,
166//!         tokens_per_minute: 10000,
167//!         burst_size: 10,
168//!     })
169//!     .build()?;
170//! ```
171//!
172//! ## API Examples
173//!
174//! ### Chat Completions
175//!
176//! ```rust
177//! use ultrafast_models_sdk::{ChatRequest, Message, Role};
178//!
179//! let request = ChatRequest {
180//!     model: "gpt-4".to_string(),
181//!     messages: vec![
182//!         Message {
183//!             role: Role::System,
184//!             content: "You are a helpful assistant.".to_string(),
185//!         },
186//!         Message {
187//!             role: Role::User,
188//!             content: "What is the capital of France?".to_string(),
189//!         },
190//!     ],
191//!     temperature: Some(0.7),
192//!     max_tokens: Some(150),
193//!     stream: Some(false),
194//!     ..Default::default()
195//! };
196//!
197//! let response = client.chat_completion(request).await?;
198//! println!("Response: {}", response.choices[0].message.content);
199//! ```
200//!
201//! ### Streaming Responses
202//!
203//! ```rust
204//! use futures::StreamExt;
205//!
206//! let mut stream = client
207//!     .stream_chat_completion(ChatRequest {
208//!         model: "gpt-4".to_string(),
209//!         messages: vec![Message::user("Tell me a story")],
210//!         stream: Some(true),
211//!         ..Default::default()
212//!     })
213//!     .await?;
214//!
215//! while let Some(chunk) = stream.next().await {
216//!     match chunk {
217//!         Ok(chunk) => {
218//!             if let Some(content) = &chunk.choices[0].delta.content {
219//!                 print!("{}", content);
220//!             }
221//!         }
222//!         Err(e) => eprintln!("Error: {:?}", e),
223//!     }
224//! }
225//! ```
226//!
227//! ### Embeddings
228//!
229//! ```rust
230//! use ultrafast_models_sdk::{EmbeddingRequest, EmbeddingInput};
231//!
232//! let request = EmbeddingRequest {
233//!     model: "text-embedding-ada-002".to_string(),
234//!     input: EmbeddingInput::String("This is a test sentence.".to_string()),
235//!     ..Default::default()
236//! };
237//!
238//! let response = client.embedding(request).await?;
239//! println!("Embedding dimensions: {}", response.data[0].embedding.len());
240//! ```
241//!
242//! ### Image Generation
243//!
244//! ```rust
245//! use ultrafast_models_sdk::ImageGenerationRequest;
246//!
247//! let request = ImageGenerationRequest {
248//!     model: "dall-e-3".to_string(),
249//!     prompt: "A beautiful sunset over the ocean".to_string(),
250//!     n: Some(1),
251//!     size: Some("1024x1024".to_string()),
252//!     ..Default::default()
253//! };
254//!
255//! let response = client.generate_image(request).await?;
256//! println!("Image URL: {}", response.data[0].url);
257//! ```
258//!
259//! ## Error Handling
260//!
261//! Comprehensive error handling with specific error types:
262//!
263//! ```rust
264//! use ultrafast_models_sdk::error::UltrafastError;
265//!
266//! match client.chat_completion(request).await {
267//!     Ok(response) => println!("Success: {:?}", response),
268//!     Err(UltrafastError::AuthenticationError { .. }) => {
269//!         eprintln!("Authentication failed");
270//!     }
271//!     Err(UltrafastError::RateLimitExceeded { retry_after, .. }) => {
272//!         eprintln!("Rate limit exceeded, retry after: {:?}", retry_after);
273//!     }
274//!     Err(UltrafastError::ProviderError { provider, message, .. }) => {
275//!         eprintln!("Provider {} error: {}", provider, message);
276//!     }
277//!     Err(e) => eprintln!("Other error: {:?}", e),
278//! }
279//! ```
280//!
281//! ## Configuration
282//!
283//! Advanced client configuration:
284//!
285//! ```rust
286//! use ultrafast_models_sdk::{UltrafastClient, ClientConfig};
287//! use std::time::Duration;
288//!
289//! let config = ClientConfig {
290//!     timeout: Duration::from_secs(30),
291//!     max_retries: 3,
292//!     retry_delay: Duration::from_secs(1),
293//!     user_agent: Some("MyApp/1.0".to_string()),
294//!     ..Default::default()
295//! };
296//!
297//! let client = UltrafastClient::standalone()
298//!     .with_config(config)
299//!     .with_openai("your-key")
300//!     .build()?;
301//! ```
302//!
303//! ## Testing
304//!
305//! The SDK includes testing utilities:
306//!
307//! ```rust
308//! #[cfg(test)]
309//! mod tests {
310//!     use super::*;
311//!     use tokio_test;
312//!
313//!     #[tokio_test]
314//!     async fn test_chat_completion() {
315//!         let client = UltrafastClient::standalone()
316//!             .with_openai("test-key")
317//!             .build()
318//!             .unwrap();
319//!
320//!         let request = ChatRequest {
321//!             model: "gpt-4".to_string(),
322//!             messages: vec![Message::user("Hello")],
323//!             ..Default::default()
324//!         };
325//!
326//!         let result = client.chat_completion(request).await;
327//!         assert!(result.is_ok());
328//!     }
329//! }
330//! ```
331//!
332//! ## Performance Optimization
333//!
334//! Tips for optimal performance:
335//!
336//! ```rust
337//! // Use connection pooling
338//! let client = UltrafastClient::standalone()
339//!     .with_connection_pool_size(10)
340//!     .with_openai("your-key")
341//!     .build()?;
342//!
343//! // Enable compression
344//! let client = UltrafastClient::standalone()
345//!     .with_compression(true)
346//!     .with_openai("your-key")
347//!     .build()?;
348//!
349//! // Configure timeouts
350//! let client = UltrafastClient::standalone()
351//!     .with_timeout(Duration::from_secs(15))
352//!     .with_openai("your-key")
353//!     .build()?;
354//! ```
355//!
356//! ## Migration Guide
357//!
358//! ### From OpenAI SDK
359//!
360//! ```rust
361//! // Before (OpenAI SDK)
362//! use openai::Client;
363//! let client = Client::new("your-key");
364//! let response = client.chat().create(request).await?;
365//!
366//! // After (Ultrafast SDK)
367//! use ultrafast_models_sdk::UltrafastClient;
368//! let client = UltrafastClient::standalone()
369//!     .with_openai("your-key")
370//!     .build()?;
371//! let response = client.chat_completion(request).await?;
372//! ```
373//!
374//! ### From Anthropic SDK
375//!
376//! ```rust
377//! // Before (Anthropic SDK)
378//! use anthropic::Client;
379//! let client = Client::new("your-key");
380//! let response = client.messages().create(request).await?;
381//!
382//! // After (Ultrafast SDK)
383//! use ultrafast_models_sdk::UltrafastClient;
384//! let client = UltrafastClient::standalone()
385//!     .with_anthropic("your-key")
386//!     .build()?;
387//! let response = client.chat_completion(request).await?;
388//! ```
389//!
390//! ## Contributing
391//!
392//! We welcome contributions! Please see our contributing guide for details on:
393//!
394//! - Code style and formatting
395//! - Testing requirements
396//! - Documentation standards
397//! - Pull request process
398//!
399//! ## License
400//!
401//! This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
402//!
403//! ## Support
404//!
405//! For support and questions:
406//!
407//! - **Issues**: [GitHub Issues](https://github.com/techgopal/ultrafast-ai-gateway/issues)
408//! - **Discussions**: [GitHub Discussions](https://github.com/techgopal/ultrafast-ai-gateway/discussions)
409//! - **Documentation**: [Project Wiki](https://github.com/techgopal/ultrafast-ai-gateway/wiki)
410
411pub mod cache;
412pub mod circuit_breaker;
413pub mod client;
414pub mod common;
415pub mod error;
416pub mod models;
417pub mod providers;
418pub mod routing;
419
420pub use circuit_breaker::{CircuitBreaker, CircuitBreakerConfig, CircuitState};
421pub use client::{ClientMode, UltrafastClient, UltrafastClientBuilder};
422pub use error::{ClientError, ProviderError};
423pub use models::{
424    AudioRequest, AudioResponse, ChatRequest, ChatResponse, Choice, EmbeddingRequest,
425    EmbeddingResponse, ImageRequest, ImageResponse, Message, Role, SpeechRequest, SpeechResponse,
426    Usage,
427};
428pub use providers::{
429    create_provider_with_circuit_breaker, Provider, ProviderConfig, ProviderMetrics,
430};
431pub use routing::{Condition, RoutingRule, RoutingStrategy};
432
433/// Result type for SDK operations.
434///
435/// This is a convenience type alias for SDK operations that can fail.
436/// It uses `ClientError` as the error type.
437pub type Result<T> = std::result::Result<T, ClientError>;
438
439#[cfg(test)]
440mod tests {
441    use super::*;
442    use crate::models::{ChatRequest, Message, Role};
443
444    #[test]
445    fn test_message_creation() {
446        let user_msg = Message::user("Hello, world!");
447        assert_eq!(user_msg.role, Role::User);
448        assert_eq!(user_msg.content, "Hello, world!");
449
450        let assistant_msg = Message::assistant("Hi there!");
451        assert_eq!(assistant_msg.role, Role::Assistant);
452        assert_eq!(assistant_msg.content, "Hi there!");
453
454        let system_msg = Message::system("You are a helpful assistant.");
455        assert_eq!(system_msg.role, Role::System);
456        assert_eq!(system_msg.content, "You are a helpful assistant.");
457    }
458
459    #[test]
460    fn test_chat_request_default() {
461        let request = ChatRequest::default();
462        assert_eq!(request.model, "");
463        assert_eq!(request.messages.len(), 0);
464        assert_eq!(request.temperature, None);
465        assert_eq!(request.max_tokens, None);
466        assert_eq!(request.stream, None);
467    }
468
469    #[test]
470    fn test_provider_config_creation() {
471        let config = ProviderConfig::new("test-provider", "test-key");
472        assert_eq!(config.name, "test-provider");
473        assert_eq!(config.api_key, "test-key");
474        assert_eq!(config.timeout.as_secs(), 30);
475        assert_eq!(config.max_retries, 3);
476        assert!(config.enabled);
477    }
478
479    #[test]
480    fn test_routing_strategy_creation() {
481        let single = RoutingStrategy::Single;
482        let fallback = RoutingStrategy::Fallback;
483        let load_balance = RoutingStrategy::LoadBalance {
484            weights: vec![0.5, 0.5],
485        };
486        let conditional = RoutingStrategy::Conditional { rules: vec![] };
487        let ab_testing = RoutingStrategy::ABTesting { split: 0.5 };
488
489        assert!(matches!(single, RoutingStrategy::Single));
490        assert!(matches!(fallback, RoutingStrategy::Fallback));
491        assert!(matches!(load_balance, RoutingStrategy::LoadBalance { .. }));
492        assert!(matches!(conditional, RoutingStrategy::Conditional { .. }));
493        assert!(matches!(ab_testing, RoutingStrategy::ABTesting { .. }));
494    }
495
496    #[test]
497    fn test_condition_matching() {
498        let context = routing::RoutingContext {
499            model: Some("gpt-4".to_string()),
500            user_region: Some("us-east-1".to_string()),
501            request_size: 1000,
502            estimated_tokens: 500,
503            user_id: Some("user123".to_string()),
504            metadata: std::collections::HashMap::new(),
505        };
506
507        let model_condition = Condition::ModelName("gpt-4".to_string());
508        assert!(model_condition.matches(&context));
509
510        let region_condition = Condition::UserRegion("us-east-1".to_string());
511        assert!(region_condition.matches(&context));
512
513        let size_condition = Condition::RequestSize(500);
514        assert!(size_condition.matches(&context));
515
516        let token_condition = Condition::TokenCount(300);
517        assert!(token_condition.matches(&context));
518    }
519}